From 231f5e72ec5d25d12a35a8456d6b8d23398c0bec Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 3 Dec 2025 21:27:26 -0800 Subject: [PATCH] WIP: completed stages of next-pc logic, still need to combine them into a pipeline --- Cargo.lock | 7 + Cargo.toml | 1 + crates/cpu/Cargo.toml | 1 + crates/cpu/src/next_pc.rs | 2241 ++++++++++++++++++------ crates/cpu/src/next_pc/next_pc.mermaid | 25 + crates/cpu/src/reg_alloc.rs | 3 +- crates/cpu/src/unit/alu_branch.rs | 5 +- crates/cpu/src/util/array_vec.rs | 12 + 8 files changed, 1708 insertions(+), 587 deletions(-) create mode 100644 crates/cpu/src/next_pc/next_pc.mermaid diff --git a/Cargo.lock b/Cargo.lock index 5d1e590..ad43a3c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,6 +211,7 @@ version = "0.1.0" dependencies = [ "fayalite", "serde", + "simple-mermaid", ] [[package]] @@ -690,6 +691,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simple-mermaid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589144a964b4b30fe3a83b4bb1a09e2475aac194ec832a046a23e75bddf9eb29" + [[package]] name = "strsim" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index a3e74f0..00f2e67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ rust-version = "1.89.0" [workspace.dependencies] fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } serde = { version = "1.0.202", features = ["derive"] } +simple-mermaid = "0.2.0" [profile.dev] opt-level = 1 diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index 4dd85d8..2f5f84c 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -17,3 +17,4 @@ version.workspace = true [dependencies] fayalite.workspace = true serde.workspace = true +simple-mermaid.workspace = true diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs index 7c62d72..f87ba35 100644 --- a/crates/cpu/src/next_pc.rs +++ b/crates/cpu/src/next_pc.rs @@ -8,50 +8,21 @@ //! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`) //! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds //! information back to the `fetch` stage to cancel fetches that need to be predicted differently. +//! +#![doc = simple_mermaid::mermaid!("next_pc/next_pc.mermaid")] use crate::{ config::{CpuConfig, CpuConfigFetchWidth}, util::array_vec::ArrayVec, }; use fayalite::{ + expr::HdlPartialEqImpl, int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType}, prelude::*, sim::value::SimOnlyValueTrait, util::ready_valid::ReadyValid, }; - -#[hdl] -pub enum PredictedCond { - Taken, - Fallthrough, -} - -#[hdl] -pub struct PredictedFallthrough {} - -#[hdl] -pub enum BranchPredictionKind { - Branch(HdlOption), - IndirectBranch(HdlOption), - Call(HdlOption), - IndirectCall(HdlOption), - Ret(HdlOption), -} - -#[hdl(get(|c| c.max_branches_per_fetch.get() - 1))] -pub type NextPcPredictionMaxBranchesBeforeLast> = DynSize; - -#[hdl(no_static)] -pub struct NextPcPrediction> { - pub fetch_pc: UInt<64>, - pub async_interrupt: Bool, - pub branches_before_last: ArrayVec< - BranchPredictionKind, - NextPcPredictionMaxBranchesBeforeLast, - >, - pub last_branch: HdlOption>, - pub last_branch_target_pc: UInt<64>, -} +use std::borrow::Cow; pub const FETCH_BLOCK_ID_WIDTH: usize = FetchBlockIdInt::BITS as usize; type FetchBlockIdInt = u8; @@ -70,7 +41,9 @@ pub struct NextPcToFetchInterface> { } #[hdl] -/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] +/// since the actual instruction definition isn't finalized yet. +/// This will be replaced at a later point. pub enum WipDecodedInsnKind { NonBranch, Branch(UInt<64>), @@ -81,7 +54,8 @@ pub enum WipDecodedInsnKind { IndirectCall, Ret, RetCond, - /// not actually an instruction read from memory, covers stuff like external interrupts, page faults, memory errors, and so on. + /// not actually an instruction read from memory, covers stuff like external interrupts, + /// page faults, memory errors, and so on. Interrupt(UInt<64>), } @@ -107,7 +81,9 @@ impl WipDecodedInsnKind { } #[hdl] -/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] since the actual instruction definition isn't finalized yet. This will be replaced at a later point. +/// WIP version of decoded instruction just good enough to represent stuff needed for [`next_pc()`] +/// since the actual instruction definition isn't finalized yet. +/// This will be replaced at a later point. pub struct WipDecodedInsn { pub fetch_block_id: UInt<8>, pub id: UInt<12>, @@ -116,14 +92,39 @@ pub struct WipDecodedInsn { pub kind: WipDecodedInsnKind, } +#[hdl] +pub enum CallStackOp { + None, + Push(UInt<64>), + Pop, +} + #[hdl(no_static)] -/// handles updating speculative branch predictor state (e.g. branch histories) when instructions retire, -/// as well as updating state when a branch instruction is mis-speculated. -pub struct NextPcToRetireInterface> { - // TODO: add needed fields +pub struct RetireToNextPcInterfacePerInsn> { + pub id: UInt<12>, + /// the pc after running this instruction. + pub next_pc: UInt<64>, + pub call_stack_op: CallStackOp, + /// should be `HdlSome(taken)` for any conditional control-flow instruction + /// with an immediate target that can be predicted as taken/not-taken (branch/call/return). + pub cond_br_taken: HdlOption, pub config: C, } +#[hdl(no_static)] +pub struct RetireToNextPcInterfaceInner> { + pub insns: ArrayVec, CpuConfigFetchWidth>, + pub config: C, +} + +#[hdl(no_static)] +/// handles updating speculative branch predictor state (e.g. branch histories) +/// when instructions retire, as well as updating state when a +/// branch instruction is mis-speculated. +pub struct RetireToNextPcInterface> { + pub inner: ReadyValid>, +} + #[hdl(no_static)] pub struct DecodeToPostDecodeInterfaceInner> { pub insns: ArrayVec>, @@ -131,6 +132,18 @@ pub struct DecodeToPostDecodeInterfaceInner> { pub config: C, } +impl SimValueDefault for DecodeToPostDecodeInterfaceInner> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { insns, config } = self; + #[hdl(sim)] + Self { + insns: insns.sim_value_default(), + config, + } + } +} + #[hdl(no_static)] pub struct DecodeToPostDecodeInterface> { pub inner: ReadyValid>, @@ -142,6 +155,1357 @@ pub struct PostDecodeOutputInterface> { pub config: C, } +#[hdl] +struct TrainBranchPredictor { + branch_predictor_index: UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>, + taken: Bool, +} + +#[hdl(no_static)] +struct Cancel> { + call_stack: CallStack, + start_pc: UInt<64>, + new_btb_entry: HdlOption, + btb_entry_index: HdlOption>, + branch_history: UInt<6>, + config: C, +} + +/// the output of `Stage::run`. +/// when cancelling operations, the returned [`StageOutput.cancel`] should be the state after +/// running all operations returned in [`StageOutput.output`] +#[hdl(no_static)] +struct StageOutput> { + outputs: ArrayVec, + cancel: HdlOption>, +} + +trait Stage: Type + SimValueDefault + ResetSteps { + type Inputs: Type; + type Output: Type; + type MaxOutputCount: Size; + + fn output_ty(config: PhantomConst) -> Self::Output; + fn max_output_count( + config: PhantomConst, + ) -> ::SizeType; + fn stage_output_ty( + config: PhantomConst, + ) -> StageOutput> { + StageOutput[Self::output_ty(config)][Self::max_output_count(config)][config] + } + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>>; + /// changes state to match `cancel` + fn cancel(state: &mut SimValue, cancel: &SimValue>>); +} + +#[hdl(no_static)] +struct NextPcStageOutput> { + start_pc: UInt<64>, + next_start_pc: UInt<64>, + btb_entry: HdlOption<( + UIntInRange<0, { BranchTargetBuffer::SIZE }>, + BTBEntryWithoutStartPc, + )>, + fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, + start_call_stack: CallStack, + config: C, +} + +impl SimValueDefault for NextPcStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + start_pc: _, + next_start_pc: _, + btb_entry, + fetch_block_id: _, + start_call_stack, + config, + } = self; + #[hdl(sim)] + Self { + start_pc: 0u64, + next_start_pc: 0u64, + btb_entry: #[hdl(sim)] + btb_entry.HdlNone(), + fetch_block_id: 0u8, + start_call_stack: start_call_stack.sim_value_default(), + config, + } + } +} + +#[hdl(no_static)] +struct NextPcStageState> { + call_stack: CallStack, + branch_target_buffer: BranchTargetBuffer, + next_pc: UInt<64>, + next_fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, + config: C, +} + +impl SimValueDefault for NextPcStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + call_stack, + branch_target_buffer, + next_pc: _, + next_fetch_block_id: _, + config, + } = self; + #[hdl(sim)] + Self { + call_stack: call_stack.sim_value_default(), + branch_target_buffer: branch_target_buffer.sim_value_default(), + // use something other than the default so you can see the reset progress + next_pc: !0u64, + // use something other than the default so you can see the reset progress + next_fetch_block_id: !0u8, + config, + } + } +} + +impl ResetSteps for NextPcStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + next_pc, + next_fetch_block_id, + config: _, + } = this; + **next_pc = 0u64.into(); // match Microwatt's reset PC + **next_fetch_block_id = 0u8.into(); + let call_stack = ResetSteps::reset_step(call_stack, step); + let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, step); + call_stack.and(branch_target_buffer) + } +} + +impl Stage for NextPcStageState> { + type Inputs = (); + type Output = NextPcStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + NextPcStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + _inputs: &SimValue, + ) -> SimValue>> { + let config = state.config.ty(); + let start_call_stack = state.call_stack.clone(); + let fetch_block_id = state.next_fetch_block_id.as_int(); + *state.next_fetch_block_id = state.next_fetch_block_id.as_int().wrapping_add(1).into(); + let start_pc = state.next_pc.as_int(); + let fetch_pc = start_pc & (!0u64 << config.get().log2_fetch_width_in_bytes); + + let btb_entry_index = state + .branch_target_buffer + .branch_pc_to_target_map + .iter() + .position(|entry| { + #[hdl(sim)] + match entry { + HdlNone => false, + HdlSome(entry) => entry.start_pc.as_int() == start_pc, + } + }); + let (next_start_pc, btb_entry) = if let Some(btb_entry_index) = btb_entry_index { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + .. + } = state; + let entry = #[hdl(sim)] + match &branch_target_buffer.branch_pc_to_target_map[btb_entry_index] { + HdlSome(entry) => entry, + _ => unreachable!(), + }; + let next_start_pc = #[hdl(sim)] + match &entry.rest.insn_kind { + BTBEntryInsnKind::Branch => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + BTBEntry::taken_pc(entry) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Call => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + CallStack::push(call_stack, BTBEntry::after_call_pc(entry)); + BTBEntry::taken_pc(entry) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Ret => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + CallStack::pop(call_stack).unwrap_or(BTBEntry::taken_pc(entry)) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Unknown => unreachable!(), + }; + ( + next_start_pc, + #[hdl(sim)] + HdlSome((btb_entry_index, &entry.rest)), + ) + } else { + ( + fetch_pc.wrapping_add(config.get().fetch_width_in_bytes() as u64), + #[hdl(sim)] + HdlNone(), + ) + }; + let output = #[hdl(sim)] + NextPcStageOutput::<_> { + start_pc, + next_start_pc, + btb_entry, + fetch_block_id, + start_call_stack, + config, + }; + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + cancel: #[hdl(sim)] + (HdlOption[Cancel[config]]).HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + next_pc, + next_fetch_block_id: _, + config: _, + } = state; + #[hdl(sim)] + let Cancel::<_> { + call_stack: new_call_stack, + start_pc, + new_btb_entry, + btb_entry_index, + branch_history: _, + config: _, + } = cancel; + call_stack.clone_from(new_call_stack); + next_pc.clone_from(start_pc); + #[hdl(sim)] + if let HdlSome(new_btb_entry) = new_btb_entry { + // add/update btb entry + + // get old entry if it's still there + let btb_entry_index = #[hdl(sim)] + if let HdlSome(btb_entry_index) = btb_entry_index { + #[hdl(sim)] + if let HdlSome(entry) = + &branch_target_buffer.branch_pc_to_target_map[**btb_entry_index] + { + if entry.start_pc == *start_pc { + // found the old entry + Some(**btb_entry_index) + } else { + None + } + } else { + None + } + } else { + None + }; + + let btb_entry_index = btb_entry_index.unwrap_or_else(|| { + // old entry isn't there, pick an entry to replace + BranchTargetBuffer::next_index_to_replace(branch_target_buffer) + }); + + // replace with new entry + branch_target_buffer.branch_pc_to_target_map[btb_entry_index] = #[hdl(sim)] + HdlSome( + #[hdl(sim)] + BTBEntry { + start_pc, + rest: new_btb_entry, + }, + ); + } else if let HdlSome(btb_entry_index) = btb_entry_index { + // remove btb entry if it's still there + let entry_mut = &mut branch_target_buffer.branch_pc_to_target_map[**btb_entry_index]; + #[hdl(sim)] + if let HdlSome(entry) = &entry_mut { + if entry.start_pc == *start_pc { + // we found it, remove it + *entry_mut = #[hdl(sim)] + HdlNone(); + } + } + } + } +} + +#[hdl(no_static)] +struct BrPredStageOutput> { + start_branch_history: UInt<6>, + branch_predictor_index: HdlOption>, + config: C, +} + +impl SimValueDefault for BrPredStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + start_branch_history: self.start_branch_history.zero(), + branch_predictor_index: #[hdl(sim)] + HdlNone(), + config: self.config, + } + } +} + +#[hdl(no_static)] +struct BrPredStageState> { + branch_history: UInt<6>, + branch_predictor: Array, + config: C, +} + +fn step_branch_history(branch_history: &mut SimValue>, taken: bool) { + **branch_history = + ((&**branch_history << 1) | taken.cast_to_static::>()).cast_to_static::>(); +} + +impl BrPredStageState> { + fn branch_predictor_index(this: &SimValue, branch_pc: u64) -> usize { + let mut t = this.branch_history.cast_to_static::>().as_int(); + t ^= t.rotate_left(5) & !branch_pc.rotate_right(3); + t ^= branch_pc; + t ^= !t.rotate_left(2) & t.rotate_left(4); + let mut retval = 0; + for i in (0..BRANCH_PREDICTOR_LOG2_SIZE).step_by(BRANCH_PREDICTOR_LOG2_SIZE) { + retval ^= t >> i; + } + retval as usize % BRANCH_PREDICTOR_SIZE + } +} + +impl SimValueDefault for BrPredStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + branch_history: _, + branch_predictor: _, + config, + } = self; + #[hdl(sim)] + Self { + // use something other than the default so you can see the reset progress + branch_history: (-1i8).cast_to_static::>(), + // use something other than the default so you can see the reset progress + branch_predictor: std::array::from_fn(|_| { + BranchPredictionState::towards_not_taken(&BranchPredictionState.sim_value_default()) + }), + config, + } + } +} + +impl ResetSteps for BrPredStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { + branch_history, + branch_predictor, + config: _, + } = this; + **branch_history = 0u8.cast_to_static::>(); + ResetSteps::reset_step(branch_predictor, step) + } +} + +impl Stage for BrPredStageState> { + type Inputs = NextPcStageOutput>; + type Output = BrPredStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + BrPredStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + let config = state.config.ty(); + #[hdl(sim)] + let NextPcStageOutput::<_> { + start_pc, + next_start_pc: _, + btb_entry, + fetch_block_id: _, + start_call_stack, + config: _, + } = inputs; + let start_branch_history = state.branch_history.clone(); + let mut branch_predictor_index = #[hdl(sim)] + HdlNone(); + #[hdl(sim)] + if let HdlSome(btb_entry) = btb_entry { + let taken_and_opposite_addr_kind = #[hdl(sim)] + match &btb_entry.1.addr_kind { + BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => None, + BTBEntryAddrKind::CondTaken => Some(( + true, + #[hdl(sim)] + BTBEntryAddrKind::CondNotTaken(), + )), + BTBEntryAddrKind::CondNotTaken => Some(( + false, + #[hdl(sim)] + BTBEntryAddrKind::CondTaken(), + )), + }; + if let Some((taken, opposite_addr_kind)) = taken_and_opposite_addr_kind { + let index = Self::branch_predictor_index( + state, + BTBEntry::branch_pc( + &#[hdl(sim)] + BTBEntry { + start_pc, + rest: &btb_entry.1, + }, + ), + ); + if taken != BranchPredictionState::is_taken(&state.branch_predictor[index]) { + let btb_entry_index = &btb_entry.0; + let mut btb_entry = btb_entry.1.clone(); + btb_entry.addr_kind = opposite_addr_kind; + let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + let retval = #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs.sim_value_default(), + cancel: #[hdl(sim)] + cancel.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack: start_call_stack, + start_pc, + new_btb_entry: #[hdl(sim)] + HdlSome(btb_entry), + btb_entry_index: #[hdl(sim)] + HdlSome(btb_entry_index), + branch_history: start_branch_history, + config, + }, + ), + }; + return retval; + } + branch_predictor_index = #[hdl(sim)] + HdlSome(index.cast_to_static::>()); + step_branch_history(&mut state.branch_history, taken); + } + } + let output = #[hdl(sim)] + BrPredStageOutput::<_> { + start_branch_history, + branch_predictor_index, + config, + }; + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + cancel: #[hdl(sim)] + (HdlOption[Cancel[config]]).HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + #[hdl(sim)] + let Cancel::<_> { + call_stack: _, + start_pc: _, + new_btb_entry: _, + btb_entry_index: _, + branch_history, + config: _, + } = cancel; + state.branch_history.clone_from(branch_history); + } +} + +impl BrPredStageState> { + #[hdl] + fn train_branch_predictor( + this: &mut SimValue, + train_branch_predictor: &SimValue, + ) { + #[hdl(sim)] + let TrainBranchPredictor { + branch_predictor_index, + taken, + } = train_branch_predictor; + let branch_prediction_state = &mut this.branch_predictor[**branch_predictor_index]; + if **taken { + *branch_prediction_state = + BranchPredictionState::towards_taken(branch_prediction_state); + } else { + *branch_prediction_state = + BranchPredictionState::towards_not_taken(branch_prediction_state); + } + } +} + +#[hdl(no_static)] +struct FetchDecodeStageState> { + config: C, +} + +impl SimValueDefault for FetchDecodeStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + config: self.config, + } + } +} + +impl ResetSteps for FetchDecodeStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +#[hdl(no_static)] +struct FetchDecodeStageOutput> { + next_pc_stage_output: NextPcStageOutput, + decode_output: DecodeToPostDecodeInterfaceInner, +} + +impl SimValueDefault for FetchDecodeStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + next_pc_stage_output, + decode_output, + } = self; + #[hdl(sim)] + Self { + next_pc_stage_output: next_pc_stage_output.sim_value_default(), + decode_output: decode_output.sim_value_default(), + } + } +} + +impl Stage for FetchDecodeStageState> { + type Inputs = FetchDecodeStageOutput>; + type Output = FetchDecodeStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + FetchDecodeStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs.new_full_sim([inputs]), + cancel: #[hdl(sim)] + cancel.HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + +#[hdl(no_static)] +struct PostDecodeStageState> { + config: C, +} + +#[hdl(no_static)] +struct PostDecodeStageOutput> { + insn: WipDecodedInsn, + next_pc: UInt<64>, + btb_entry_index: HdlOption>, + start_branch_history: UInt<6>, + start_call_stack: CallStack, + branch_predictor_index: HdlOption>, + config: C, +} + +impl SimValueDefault for PostDecodeStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + insn: self.insn.sim_value_default(), + next_pc: 0u64, + btb_entry_index: #[hdl(sim)] + HdlNone(), + start_branch_history: self.start_branch_history.zero(), + start_call_stack: self.start_call_stack.sim_value_default(), + branch_predictor_index: #[hdl(sim)] + HdlNone(), + config: self.config, + } + } +} + +impl SimValueDefault for PostDecodeStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + config: self.config, + } + } +} + +impl ResetSteps for PostDecodeStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +impl Stage for PostDecodeStageState> { + type Inputs = ( + FetchDecodeStageOutput>, + BrPredStageOutput>, + ); + type Output = PostDecodeStageOutput>; + type MaxOutputCount = CpuConfigFetchWidth>; + + fn output_ty(config: PhantomConst) -> Self::Output { + PostDecodeStageOutput[config] + } + + fn max_output_count( + config: PhantomConst, + ) -> ::SizeType { + CpuConfigFetchWidth[config] + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + #[hdl(sim)] + let (fetch_decode_stage_output, br_pred_stage_output) = inputs; + #[hdl(sim)] + let FetchDecodeStageOutput::<_> { + next_pc_stage_output, + decode_output, + } = fetch_decode_stage_output; + #[hdl(sim)] + let NextPcStageOutput::<_> { + start_pc, + next_start_pc: predicted_next_start_pc, + btb_entry: predicted_btb_entry, + fetch_block_id, + start_call_stack, + config: _, + } = next_pc_stage_output; + #[hdl(sim)] + let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output; + #[hdl(sim)] + let BrPredStageOutput::<_> { + start_branch_history, + branch_predictor_index, + config: _, + } = br_pred_stage_output; + assert_ne!( + **ArrayVec::len_sim(&insns), + 0, + "fetch/decode must always return at least one instruction \ + -- either the decoded instructions or a WipDecodedInsnKind::Interrupt", + ); + let insns = ArrayVec::elements_sim_ref(&insns); + let StageOutput { + outputs: outputs_ty, + cancel: cancel_ty, + } = Self::stage_output_ty(config); + assert_eq!(outputs_ty.capacity(), decode_output.insns.ty().capacity()); + let mut outputs = outputs_ty.sim_value_default(); + let mut add_output_insn = |insn: &SimValue, + next_pc: Option, + can_train_cond_branch_predictor: bool, + fallthrough_offset: &mut u8| { + ArrayVec::try_push_sim( + &mut outputs, + #[hdl(sim)] + PostDecodeStageOutput::<_> { + insn, + next_pc: next_pc.unwrap_or_else(|| { + insn.pc + .as_int() + .wrapping_add(insn.size_in_bytes.cast_to_static::>().as_int()) + }), + btb_entry_index: #[hdl(sim)] + match predicted_btb_entry { + HdlSome(predicted_btb_entry) => + { + #[hdl(sim)] + HdlSome(&predicted_btb_entry.0) + } + HdlNone => + { + #[hdl(sim)] + HdlNone() + } + }, + start_branch_history, + start_call_stack, + branch_predictor_index: if can_train_cond_branch_predictor { + branch_predictor_index.clone() + } else { + #[hdl(sim)] + HdlNone() + }, + config, + }, + ) + .expect("known to be in bounds"); + *fallthrough_offset += insn.size_in_bytes.cast_to_static::>().as_int(); + }; + if let Some(target_pc) = WipDecodedInsnKind::interrupt_target_pc_sim(&insns[0].kind) { + add_output_insn(&insns[0], Some(target_pc.as_int()), false, &mut 0); + let mut call_stack = start_call_stack.clone(); + CallStack::push(&mut call_stack, start_pc); + let retval = #[hdl(sim)] + StageOutput::<_, _, _> { + outputs, + cancel: #[hdl(sim)] + cancel_ty.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack, + start_pc: target_pc, + new_btb_entry: #[hdl(sim)] + HdlNone(), + btb_entry_index: #[hdl(sim)] + HdlNone(), + branch_history: start_branch_history, + config, + }, + ), + }; + return retval; + } + let mut fallthrough_offset = 0u8; + let mut branch_offset = 0u8; + let mut after_call_offset = 0u8; + let mut btb_entry_fields = None; + for insn in insns { + #[hdl(sim)] + let WipDecodedInsn { + fetch_block_id: insn_fetch_block_id, + id: _, + pc, + size_in_bytes: _, + kind, + } = insn; + assert_eq!( + insn_fetch_block_id, fetch_block_id, + "fetch decode pipeline's output isn't in-sync with fetching_queue", + ); + let guess_branch_addr_kind = |fallback_taken| { + #[hdl(sim)] + if let HdlSome(entry) = predicted_btb_entry { + let addr_kind = &entry.1.addr_kind; + #[hdl(sim)] + match addr_kind { + BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => {} + BTBEntryAddrKind::CondTaken | BTBEntryAddrKind::CondNotTaken => { + return addr_kind.clone(); + } + } + } + if fallback_taken { + #[hdl(sim)] + BTBEntryAddrKind::CondTaken() + } else { + #[hdl(sim)] + BTBEntryAddrKind::CondNotTaken() + } + }; + let insn_kind; + let addr_kind; + let can_train_cond_branch_predictor; + let target_pc = #[hdl(sim)] + match kind { + WipDecodedInsnKind::NonBranch => { + add_output_insn(insn, None, false, &mut fallthrough_offset); + continue; + } + WipDecodedInsnKind::Branch(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + can_train_cond_branch_predictor = false; + Some(target_pc.as_int()) + } + WipDecodedInsnKind::BranchCond(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + // guess backwards branches are taken and forwards branches are not + addr_kind = guess_branch_addr_kind(target_pc.as_int() <= pc.as_int()); + can_train_cond_branch_predictor = true; + Some(target_pc.as_int()) + } + WipDecodedInsnKind::IndirectBranch => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Indirect(); + can_train_cond_branch_predictor = false; + None + } + WipDecodedInsnKind::Call(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + can_train_cond_branch_predictor = false; + Some(target_pc.as_int()) + } + WipDecodedInsnKind::CallCond(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + // guess conditional calls are taken + addr_kind = guess_branch_addr_kind(true); + can_train_cond_branch_predictor = true; + Some(target_pc.as_int()) + } + WipDecodedInsnKind::IndirectCall => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Indirect(); + can_train_cond_branch_predictor = false; + None + } + WipDecodedInsnKind::Ret => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Ret(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + can_train_cond_branch_predictor = false; + None + } + WipDecodedInsnKind::RetCond => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Ret(); + // guess conditional returns are taken + addr_kind = guess_branch_addr_kind(true); + can_train_cond_branch_predictor = true; + None + } + WipDecodedInsnKind::Interrupt(_) => { + // interrupt after other instructions, + // just truncate the fetch block before the interrupt + break; + } + WipDecodedInsnKind::Unknown => unreachable!(), + }; + + // all branches/calls/returns end up here + + if btb_entry_fields.is_some() { + // TODO: maybe implement handling multiple ctrl transfer insns in the same fetch block, + // for now we just truncate the fetch block right before the second ctrl transfer insn. + break; + } + branch_offset = fallthrough_offset; + let target_pc = target_pc.unwrap_or_else(|| predicted_next_start_pc.as_int()); + add_output_insn( + insn, + Some(target_pc), + can_train_cond_branch_predictor, + &mut fallthrough_offset, + ); + #[hdl(sim)] + match &insn_kind { + BTBEntryInsnKind::Call => after_call_offset = fallthrough_offset, + BTBEntryInsnKind::Branch | BTBEntryInsnKind::Ret | BTBEntryInsnKind::Unknown => {} + } + btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); + } + let new_btb_entry = if let Some((insn_kind, addr_kind, target_pc)) = btb_entry_fields { + #[hdl(sim)] + HdlSome( + #[hdl(sim)] + BTBEntryWithoutStartPc { + target_pc, + fallthrough_offset, + branch_offset, + after_call_offset, + insn_kind, + addr_kind, + }, + ) + } else { + #[hdl(sim)] + HdlNone() + }; + let (btb_entry_index, predicted_btb_entry) = #[hdl(sim)] + match predicted_btb_entry { + HdlSome(predicted_btb_entry) => { + #[hdl(sim)] + let (btb_entry_index, predicted_btb_entry) = predicted_btb_entry; + ( + #[hdl(sim)] + HdlSome(btb_entry_index), + #[hdl(sim)] + HdlSome(predicted_btb_entry), + ) + } + HdlNone => ( + #[hdl(sim)] + HdlNone(), + #[hdl(sim)] + HdlNone(), + ), + }; + if *new_btb_entry.cmp_ne(predicted_btb_entry) { + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs_ty.sim_value_default(), + cancel: #[hdl(sim)] + cancel_ty.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack: start_call_stack, + start_pc, + new_btb_entry, + btb_entry_index, + branch_history: start_branch_history, + config, + }, + ), + } + } else { + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs, + cancel: #[hdl(sim)] + cancel_ty.HdlNone(), + } + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + +#[hdl(no_static)] +struct RenameDispatchExecuteStageState> { + config: C, +} + +#[hdl(no_static)] +struct RenameDispatchExecuteStageOutput> { + post_decode_stage_output: PostDecodeStageOutput, +} + +impl SimValueDefault for RenameDispatchExecuteStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + post_decode_stage_output, + } = self; + #[hdl(sim)] + Self { + post_decode_stage_output: post_decode_stage_output.sim_value_default(), + } + } +} + +impl SimValueDefault for RenameDispatchExecuteStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { config } = self; + #[hdl(sim)] + Self { config } + } +} + +impl ResetSteps for RenameDispatchExecuteStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +impl Stage for RenameDispatchExecuteStageState> { + type Inputs = RenameDispatchExecuteStageOutput>; + type Output = RenameDispatchExecuteStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + RenameDispatchExecuteStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs.new_full_sim([inputs]), + cancel: #[hdl(sim)] + cancel.HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + +#[hdl(no_static)] +struct RetireStageInput> { + rename_dispatch_execute_stage_output: RenameDispatchExecuteStageOutput, + retire_interface_per_insn: RetireToNextPcInterfacePerInsn, +} + +#[hdl(no_static)] +struct RetireStageState> { + config: C, +} + +#[hdl(no_static)] +struct RetireStageOutput> { + train_branch_predictor: HdlOption, + config: C, +} + +impl SimValueDefault for RetireStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { config } = self; + #[hdl(sim)] + Self { config } + } +} + +impl ResetSteps for RetireStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +impl Stage for RetireStageState> { + type Inputs = RetireStageInput>; + type Output = RetireStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + RetireStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + #[hdl(sim)] + let RetireStageInput::<_> { + rename_dispatch_execute_stage_output, + retire_interface_per_insn, + } = inputs; + #[hdl(sim)] + let RetireToNextPcInterfacePerInsn::<_> { + id, + next_pc, + call_stack_op, + cond_br_taken, + config: _, + } = retire_interface_per_insn; + #[hdl(sim)] + let RenameDispatchExecuteStageOutput::<_> { + post_decode_stage_output, + } = rename_dispatch_execute_stage_output; + #[hdl(sim)] + let PostDecodeStageOutput::<_> { + insn, + next_pc: predicted_next_pc, + btb_entry_index, + start_branch_history, + start_call_stack, + branch_predictor_index, + config: _, + } = post_decode_stage_output; + assert_eq!(*id, insn.id, "instruction queuing out of sync"); + let StageOutput { + outputs: outputs_ty, + cancel: cancel_ty, + } = Self::stage_output_ty(config); + let mut branch_history = start_branch_history.clone(); + let train_branch_predictor = #[hdl(sim)] + if let HdlSome(taken) = cond_br_taken { + step_branch_history(&mut branch_history, **taken); + #[hdl(sim)] + if let HdlSome(branch_predictor_index) = branch_predictor_index { + #[hdl(sim)] + HdlSome( + #[hdl(sim)] + TrainBranchPredictor { + branch_predictor_index, + taken, + }, + ) + } else { + unreachable!() + } + } else { + #[hdl(sim)] + HdlNone() + }; + if next_pc != predicted_next_pc { + let cond_addr_kind = || { + #[hdl(sim)] + if let HdlSome(cond_br_taken) = cond_br_taken { + if **cond_br_taken { + #[hdl(sim)] + BTBEntryAddrKind::CondTaken() + } else { + #[hdl(sim)] + BTBEntryAddrKind::CondNotTaken() + } + } else { + unreachable!(); + } + }; + let make_btb_entry = + |after_call_offset: u8, + insn_kind: SimValue, + addr_kind: SimValue| { + #[hdl(sim)] + HdlSome( + #[hdl(sim)] + BTBEntryWithoutStartPc { + target_pc: next_pc, + fallthrough_offset: insn.size_in_bytes.cast_to_static::>(), + branch_offset: 0u8, + after_call_offset, + insn_kind, + addr_kind, + }, + ) + }; + let new_btb_entry = #[hdl(sim)] + match &insn.kind { + WipDecodedInsnKind::Branch(_) => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Branch(), + #[hdl(sim)] + BTBEntryAddrKind::Unconditional(), + ), + WipDecodedInsnKind::BranchCond(_) => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Branch(), + cond_addr_kind(), + ), + WipDecodedInsnKind::IndirectBranch => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Branch(), + #[hdl(sim)] + BTBEntryAddrKind::Indirect(), + ), + WipDecodedInsnKind::Call(_) => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Call(), + #[hdl(sim)] + BTBEntryAddrKind::Unconditional(), + ), + WipDecodedInsnKind::CallCond(_) => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Call(), + cond_addr_kind(), + ), + WipDecodedInsnKind::IndirectCall => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Call(), + #[hdl(sim)] + BTBEntryAddrKind::Indirect(), + ), + WipDecodedInsnKind::Ret => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Ret(), + #[hdl(sim)] + BTBEntryAddrKind::Unconditional(), + ), + WipDecodedInsnKind::RetCond => make_btb_entry( + 0, + #[hdl(sim)] + BTBEntryInsnKind::Ret(), + cond_addr_kind(), + ), + WipDecodedInsnKind::NonBranch | WipDecodedInsnKind::Interrupt(_) => + { + #[hdl(sim)] + HdlNone() + } + WipDecodedInsnKind::Unknown => unreachable!(), + }; + let mut call_stack = start_call_stack.clone(); + #[hdl(sim)] + match call_stack_op { + CallStackOp::None => {} + CallStackOp::Push(pc) => CallStack::push(&mut call_stack, pc), + CallStackOp::Pop => { + CallStack::pop(&mut call_stack); + } + CallStackOp::Unknown => unreachable!(), + } + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs_ty.new_sim( + #[hdl(sim)] + RetireStageOutput::<_> { + train_branch_predictor, + config, + }, + ), + cancel: #[hdl(sim)] + cancel_ty.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack, + start_pc: next_pc, + new_btb_entry, + btb_entry_index, + branch_history, + config, + }, + ), + } + } else { + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs_ty.new_full_sim([ + #[hdl(sim)] + RetireStageOutput::<_> { + train_branch_predictor, + config, + }, + ]), + cancel: #[hdl(sim)] + cancel_ty.HdlNone(), + } + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + #[hdl] enum BranchPredictionState { StronglyNotTaken, @@ -221,6 +1585,12 @@ impl SimValueDefault for SimOnly { } } +impl SimValueDefault for ArrayVec { + fn sim_value_default(self) -> SimValue { + self.new_sim(self.element().sim_value_default()) + } +} + impl SimValueDefault for HdlOption { fn sim_value_default(self) -> SimValue { self.HdlNone().to_sim_value_with_type(self) @@ -239,6 +1609,35 @@ impl SimValueDefault for UIntType { } } +impl SimValueDefault for WipDecodedInsnKind { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + WipDecodedInsnKind::NonBranch() + } +} + +impl SimValueDefault for WipDecodedInsn { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + fetch_block_id, + id, + pc, + size_in_bytes, + kind, + } = self; + #[hdl(sim)] + WipDecodedInsn { + fetch_block_id: fetch_block_id.sim_value_default(), + id: id.sim_value_default(), + pc: pc.sim_value_default(), + size_in_bytes: size_in_bytes.sim_value_default(), + kind: kind.sim_value_default(), + } + } +} + trait ResetSteps: Type { fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus; } @@ -328,6 +1727,43 @@ enum BTBEntryInsnKind { Ret, } +// TODO: replace with #[hdl(cmp_eq)] when that's implemented for enums +impl HdlPartialEqImpl for BTBEntryInsnKind { + #[track_caller] + fn cmp_value_eq( + lhs: Self, + lhs_value: Cow<'_, Self::SimValue>, + rhs: Self, + rhs_value: Cow<'_, Self::SimValue>, + ) -> bool { + *Self::cmp_sim_value_eq( + Cow::Owned(SimValue::from_value(lhs, lhs_value.into_owned())), + Cow::Owned(SimValue::from_value(rhs, rhs_value.into_owned())), + ) + } + + #[track_caller] + fn cmp_sim_value_eq( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) == SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_sim_value_ne( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) != SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_expr_eq(lhs: Expr, rhs: Expr) -> Expr { + lhs.cast_to_bits().cmp_eq(rhs.cast_to_bits()) + } +} + impl BTBEntryInsnKind { #[hdl] fn try_from_decoded_insn_kind(kind: &SimValue) -> Option> { @@ -364,6 +1800,43 @@ enum BTBEntryAddrKind { CondNotTaken, } +// TODO: replace with #[hdl(cmp_eq)] when that's implemented for enums +impl HdlPartialEqImpl for BTBEntryAddrKind { + #[track_caller] + fn cmp_value_eq( + lhs: Self, + lhs_value: Cow<'_, Self::SimValue>, + rhs: Self, + rhs_value: Cow<'_, Self::SimValue>, + ) -> bool { + *Self::cmp_sim_value_eq( + Cow::Owned(SimValue::from_value(lhs, lhs_value.into_owned())), + Cow::Owned(SimValue::from_value(rhs, rhs_value.into_owned())), + ) + } + + #[track_caller] + fn cmp_sim_value_eq( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) == SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_sim_value_ne( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) != SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_expr_eq(lhs: Expr, rhs: Expr) -> Expr { + lhs.cast_to_bits().cmp_eq(rhs.cast_to_bits()) + } +} + impl BTBEntryAddrKind { #[hdl] fn taken(this: &SimValue) -> bool { @@ -410,28 +1883,52 @@ impl BTBEntryAddrKind { } } -#[hdl] -struct BTBEntry { - /// address of first instruction to run in this fetch block - start_pc: UInt<64>, +#[hdl(cmp_eq)] +struct BTBEntryWithoutStartPc { target_pc: UInt<64>, /// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`. /// needed because there may be more than one branch in a fetch block fallthrough_offset: UInt<8>, + /// the pc to use for branch prediction is `start_pc + branch_offset` + branch_offset: UInt<8>, /// when a call is made, the return address is `start_pc + after_call_offset` after_call_offset: UInt<8>, insn_kind: BTBEntryInsnKind, addr_kind: BTBEntryAddrKind, } +#[hdl] +struct BTBEntry { + /// address of first instruction to run in this fetch block + start_pc: UInt<64>, + rest: BTBEntryWithoutStartPc, +} + impl BTBEntry { fn taken_pc(this: &SimValue) -> u64 { - this.target_pc.as_int() + this.rest.target_pc.as_int() } - fn not_taken_fetch_pc(this: &SimValue) -> u64 { + fn not_taken_start_pc(this: &SimValue) -> u64 { + Self::fallthrough_pc(this) + } + /// when branch is not taken, this returns the next pc to fetch from. + /// needed because there may be more than one branch in a fetch block + fn fallthrough_pc(this: &SimValue) -> u64 { this.start_pc .as_int() - .wrapping_add(this.fallthrough_offset.as_int().into()) + .wrapping_add(this.rest.fallthrough_offset.as_int().into()) + } + /// the pc to use for branch prediction + fn branch_pc(this: &SimValue) -> u64 { + this.start_pc + .as_int() + .wrapping_add(this.rest.branch_offset.as_int().into()) + } + /// when a call is made, this gives the return address + fn after_call_pc(this: &SimValue) -> u64 { + this.start_pc + .as_int() + .wrapping_add(this.rest.after_call_offset.as_int().into()) } } @@ -489,11 +1986,15 @@ impl SimValueDefault for BranchTargetBuffer { #[hdl(sim)] BTBEntry { start_pc: !0u64, - target_pc: !0u64, - fallthrough_offset: !0u8, - after_call_offset: !0u8, - insn_kind: BTBEntryInsnKind.Call(), - addr_kind: BTBEntryAddrKind.CondNotTaken(), + rest: #[hdl(sim)] + BTBEntryWithoutStartPc { + target_pc: !0u64, + fallthrough_offset: !0u8, + branch_offset: !0u8, + after_call_offset: !0u8, + insn_kind: BTBEntryInsnKind.Call(), + addr_kind: BTBEntryAddrKind.CondNotTaken(), + }, }, ); Self::SIZE], next_index_to_replace_lfsr: LFSR31.sim_value_default(), @@ -514,124 +2015,6 @@ impl ResetSteps for BranchTargetBuffer { } } -#[hdl] -struct BranchHistory { - history: Array, - /// exclusive - tail: UIntInRange<0, { BranchHistory::SIZE }>, - /// inclusive, always at or after tail, always at or before speculative_head - non_speculative_head: UIntInRange<0, { BranchHistory::SIZE }>, - /// inclusive, always at or after both tail and non_speculative_head - speculative_head: UIntInRange<0, { BranchHistory::SIZE }>, -} - -impl ResetSteps for BranchHistory { - #[hdl] - fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { - #[hdl(sim)] - let Self { - history, - tail, - non_speculative_head, - speculative_head, - } = this; - **tail = 0; - **non_speculative_head = 0; - **speculative_head = 0; - ResetSteps::reset_step(history, step) - } -} - -impl SimValueDefault for BranchHistory { - #[hdl] - fn sim_value_default(self) -> SimValue { - #[hdl(sim)] - BranchHistory { - // something other than zero so you can see the values getting reset - history: [true; Self::SIZE], - tail: 0usize.to_sim_value_with_type(self.tail), - non_speculative_head: 0usize.to_sim_value_with_type(self.non_speculative_head), - speculative_head: 0usize.to_sim_value_with_type(self.speculative_head), - } - } -} - -enum BranchHistoryTryPushSpeculativeError { - NoSpace, -} - -enum BranchHistoryTryPushNonSpeculativeError { - NoSpace, - Misprediction { speculated: bool }, -} - -impl BranchHistory { - const LOG2_SIZE: usize = 8; - const SIZE: usize = 1 << Self::LOG2_SIZE; - fn next_pos(pos: usize) -> usize { - (pos + 1) % Self::SIZE - } - fn prev_pos(pos: usize) -> usize { - (pos + Self::SIZE - 1) % Self::SIZE - } - fn history_from_head(this: &SimValue, head: usize) -> [bool; N] { - let mut retval = [false; N]; - let mut pos = head; - for entry in &mut retval { - if pos == *this.tail { - break; - } - *entry = *this.history[pos]; - pos = Self::prev_pos(pos); - } - retval - } - fn delete_speculative_history(this: &mut SimValue) { - let non_speculative_head = *this.non_speculative_head; - *this.speculative_head = non_speculative_head; - } - fn recent_history_including_speculative(this: &SimValue) -> [bool; N] { - let head = *this.speculative_head; - Self::history_from_head(this, head) - } - fn speculative_full(this: &SimValue) -> bool { - let speculative_head = *this.speculative_head; - Self::next_pos(speculative_head) == *this.tail - } - fn try_push_speculative( - this: &mut SimValue, - value: bool, - ) -> Result<(), BranchHistoryTryPushSpeculativeError> { - if Self::speculative_full(this) { - Err(BranchHistoryTryPushSpeculativeError::NoSpace) - } else { - let speculative_head = Self::next_pos(*this.speculative_head); - *this.speculative_head = speculative_head; - *this.history[speculative_head] = value; - Ok(()) - } - } - fn try_push_non_speculative( - this: &mut SimValue, - value: bool, - ) -> Result<(), BranchHistoryTryPushNonSpeculativeError> { - let speculative_head = *this.speculative_head; - let non_speculative_head = *this.non_speculative_head; - if speculative_head == non_speculative_head { - Err(BranchHistoryTryPushNonSpeculativeError::NoSpace) - } else { - let pos = Self::next_pos(non_speculative_head); - let speculated = *this.history[pos]; - if speculated != value { - Err(BranchHistoryTryPushNonSpeculativeError::Misprediction { speculated }) - } else { - *this.non_speculative_head = pos; - Ok(()) - } - } - } -} - #[hdl] struct Queue { data: ArrayType, @@ -756,410 +2139,102 @@ impl SimValueDefault for FetchQueueEntry { const BRANCH_PREDICTOR_LOG2_SIZE: usize = 8; const BRANCH_PREDICTOR_SIZE: usize = 1 << BRANCH_PREDICTOR_LOG2_SIZE; -#[hdl] +#[hdl(no_static)] pub struct NextPcState> { - speculative_call_stack: CallStack, - non_speculative_call_stack: CallStack, - branch_target_buffer: BranchTargetBuffer, - branch_history: BranchHistory, - branch_predictor: Array, - fetching_queue: Queue>, - cancel_in_progress_fetches: Bool, - pc: UInt<64>, - fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, + next_pc_stage_state: NextPcStageState, + next_pc_stage_outputs: Queue, ConstUsize<1>>, + br_pred_stage_state: BrPredStageState, + br_pred_stage_outputs: Queue, ConstUsize<32>>, + fetch_decode_stage_state: FetchDecodeStageState, + fetch_decode_stage_outputs: Queue, ConstUsize<32>>, + post_decode_stage_state: PostDecodeStageState, + post_decode_stage_outputs: Queue, ConstUsize<1>>, + rename_dispatch_execute_stage_state: RenameDispatchExecuteStageState, + rename_dispatch_execute_stage_outputs: + Queue, ConstUsize<256>>, + retire_stage_state: RetireStageState, config: C, } -impl> NextPcState { - fn branch_predictor_index(this: &SimValue, pc: u64) -> usize { - let mut history = 0u64; - let history_bits: [bool; BRANCH_PREDICTOR_LOG2_SIZE] = - BranchHistory::recent_history_including_speculative(&this.branch_history); - for history_bit in history_bits { - history <<= 1; - if history_bit { - history |= 1; - } - } - let mut t = history; - t ^= t.rotate_left(5) & !pc.rotate_right(3); - t ^= pc; - t ^= !t.rotate_left(2) & t.rotate_left(4); - let mut retval = 0; - for i in (0..BRANCH_PREDICTOR_LOG2_SIZE).step_by(BRANCH_PREDICTOR_LOG2_SIZE) { - retval ^= t >> i; - } - retval as usize % BRANCH_PREDICTOR_SIZE - } - #[hdl] - fn handle_to_fetch( - mut this: SimValue, - ) -> ( - SimValue, - SimValue>, - ) { - let in_progress_fetches_to_cancel = if *this.cancel_in_progress_fetches { - let old_len = Queue::len(&this.fetching_queue); - if old_len == 0 { - // did a full cycle of cancelling - *this.cancel_in_progress_fetches = false; - } else { - Queue::clear(&mut this.fetching_queue); - } - old_len.cast_to_static::>() - } else if Queue::is_full(&this.fetching_queue) { - return ( - this, - #[hdl(sim)] - HdlNone(), - ); - } else { - 0u8.cast_to_static::>() - }; - - let fetch_block_id: FetchBlockIdInt = this.fetch_block_id.as_int(); - *this.fetch_block_id = fetch_block_id.wrapping_add(1).into(); - - let next_fetch_pc = - this.pc.as_int() & (!0u64 << this.config.ty().get().log2_fetch_width_in_bytes); - - let (found_btb_entry_index, found_btb_entry) = this - .branch_target_buffer - .branch_pc_to_target_map - .iter() - .enumerate() - .filter_map(|(index, entry)| { - #[hdl(sim)] - match entry { - HdlNone => None, - HdlSome(entry) => Some((index, entry)), - } - }) - .find(|(_, entry)| entry.start_pc == this.pc) - .unzip(); - let found_btb_entry = found_btb_entry.cloned(); - // TODO: handle instructions not aligned with fetch blocks - let new_pc = if let Some(entry) = &found_btb_entry { - if BTBEntryAddrKind::taken(&entry.addr_kind) { - BTBEntry::taken_pc(entry) - } else { - BTBEntry::not_taken_fetch_pc(entry) - } - } else { - next_fetch_pc.wrapping_add(this.config.ty().get().fetch_width_in_bytes() as u64) - }; - Queue::try_push( - &mut this.fetching_queue, - #[hdl(sim)] - FetchQueueEntry { - fetch_block_id, - btb_entry: found_btb_entry - .map(|entry| { - #[hdl(sim)] - HdlSome(entry) - }) - .unwrap_or_else(|| { - #[hdl(sim)] - HdlNone() - }), - btb_entry_index: found_btb_entry_index - .unwrap_or(0) - .to_sim_value_with_type(FetchQueueEntry.btb_entry_index), - next_pc: new_pc, - }, - ) - .expect("checked is_full above"); - *this.pc = new_pc.into(); - ( - this, - #[hdl(sim)] - HdlSome( - #[hdl(sim)] - NextPcToFetchInterfaceInner { - next_fetch_pc, - fetch_block_id, - in_progress_fetches_to_cancel, - }, - ), - ) - } - #[hdl] - fn handle_from_decode( - this: &mut SimValue, - from_decode: SimValue>, - ) { - #[hdl(sim)] - let DecodeToPostDecodeInterfaceInner::<_> { insns, config } = from_decode; - assert_eq!(this.config.ty(), config.ty()); - assert_ne!( - **ArrayVec::len_sim(&insns), - 0, - "fetch/decode must always return at least one instruction \ - -- either the decoded instructions or a WipDecodedInsnKind::Interrupt", - ); - let Some(fetch_queue_entry) = Queue::pop(&mut this.fetching_queue) else { - unreachable!(); - }; - #[hdl(sim)] - let FetchQueueEntry { - fetch_block_id: expected_fetch_block_id, - btb_entry, - btb_entry_index, - next_pc: orig_next_pc, - } = fetch_queue_entry; - let insns = ArrayVec::elements_sim_ref(&insns); - if let Some(target_pc) = WipDecodedInsnKind::interrupt_target_pc_sim(&insns[0].kind) { - if *target_pc != orig_next_pc { - *this.cancel_in_progress_fetches = true; - this.pc = target_pc.clone(); - } - return; - } - let start_pc = insns[0].pc.as_int(); - let mut fallthrough_offset = 0u8; - let mut after_call_offset = 0u8; - let mut btb_entry_fields = None; - let mut eval_cond_branch = || -> SimValue { - todo!(); - }; - for insn in insns { - #[hdl(sim)] - let WipDecodedInsn { - fetch_block_id, - id, - pc, - size_in_bytes, - kind, - } = insn; - assert_eq!( - *fetch_block_id, expected_fetch_block_id, - "fetch decode pipeline's output isn't in-sync with fetching_queue", - ); - let insn_kind; - let addr_kind; - let target_pc = #[hdl(sim)] - match kind { - WipDecodedInsnKind::NonBranch => continue, - WipDecodedInsnKind::Branch(target_pc) => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Branch(); - addr_kind = #[hdl(sim)] - BTBEntryAddrKind::Unconditional(); - Some(target_pc.as_int()) - } - WipDecodedInsnKind::BranchCond(target_pc) => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Branch(); - addr_kind = eval_cond_branch(); - Some(target_pc.as_int()) - } - WipDecodedInsnKind::IndirectBranch => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Branch(); - addr_kind = #[hdl(sim)] - BTBEntryAddrKind::Indirect(); - None - } - WipDecodedInsnKind::Call(target_pc) => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Call(); - addr_kind = #[hdl(sim)] - BTBEntryAddrKind::Unconditional(); - Some(target_pc.as_int()) - } - WipDecodedInsnKind::CallCond(target_pc) => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Call(); - addr_kind = eval_cond_branch(); - Some(target_pc.as_int()) - } - WipDecodedInsnKind::IndirectCall => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Call(); - addr_kind = #[hdl(sim)] - BTBEntryAddrKind::Indirect(); - None - } - WipDecodedInsnKind::Ret => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Ret(); - addr_kind = #[hdl(sim)] - BTBEntryAddrKind::Unconditional(); - None - } - WipDecodedInsnKind::RetCond => { - insn_kind = #[hdl(sim)] - BTBEntryInsnKind::Ret(); - addr_kind = eval_cond_branch(); - None - } - WipDecodedInsnKind::Interrupt(_) => { - // interrupt after other instructions, just truncate the fetch block before the interrupt - break; - } - WipDecodedInsnKind::Unknown => unreachable!(), - }; - - // all branches/calls/returns end up here - - if btb_entry_fields.is_some() { - // TODO: maybe implement handling multiple ctrl transfer insns in the same fetch block, - // for now we just truncate the fetch block right before the second ctrl transfer insn. - break; - } - btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); - fallthrough_offset += size_in_bytes.cast_to_static::>().as_int(); - #[hdl(sim)] - match insn_kind { - BTBEntryInsnKind::Call => after_call_offset = fallthrough_offset, - BTBEntryInsnKind::Branch | BTBEntryInsnKind::Ret | BTBEntryInsnKind::Unknown => {} - } - } - let new_next_pc = if let Some((insn_kind, addr_kind, mut target_pc)) = btb_entry_fields { - // add/update BTBEntry if it doesn't match - let btb_entry_index = #[hdl(sim)] - if let HdlSome(btb_entry) = btb_entry { - // verify it hasn't been changed meanwhile - #[hdl(sim)] - if let HdlSome(entry) = - &this.branch_target_buffer.branch_pc_to_target_map[*btb_entry_index] - { - // we have a btb entry, check if it has been modified - if entry.start_pc == btb_entry.start_pc { - // we found the correct BTBEntry - if target_pc.is_none() { - // save the existing target_pc if we know it - target_pc = Some(entry.target_pc.as_int()); - } - Some(*btb_entry_index) - } else { - None - } - } else { - None - } - } else { - None - }; - let btb_entry_index = btb_entry_index.unwrap_or_else(|| { - // we need to add a new entry, pick an entry to replace - BranchTargetBuffer::next_index_to_replace(&mut this.branch_target_buffer) - }); - let new_next_pc = #[hdl(sim)] - match insn_kind { - BTBEntryInsnKind::Branch => {} - BTBEntryInsnKind::Call => { - CallStack::push(&mut this.speculative_call_stack, todo!()); - todo!() - } - BTBEntryInsnKind::Ret => { - target_pc = CallStack::pop(&mut this.speculative_call_stack).or(target_pc); - } - }; - let new_entry = #[hdl(sim)] - BTBEntry { - start_pc, - target_pc: target_pc.unwrap_or(0u64), - fallthrough_offset, - after_call_offset, - insn_kind, - addr_kind, - }; - let entry_mut = &mut this.branch_target_buffer.branch_pc_to_target_map[btb_entry_index]; - *entry_mut = #[hdl(sim)] - HdlSome(new_entry); - new_next_pc - } else { - #[hdl(sim)] - if let HdlSome(btb_entry) = btb_entry { - // the fetched instructions do not need a BTBEntry, remove the BTBEntry if it still exists - let entry_mut = - &mut this.branch_target_buffer.branch_pc_to_target_map[*btb_entry_index]; - // verify it hasn't been changed meanwhile - #[hdl(sim)] - if let HdlSome(entry) = &entry_mut { - if entry.start_pc == btb_entry.start_pc { - *entry_mut = #[hdl(sim)] - HdlNone(); - } - } - } - start_pc + u64::from(fallthrough_offset) - }; - if new_next_pc != orig_next_pc.as_int() { - *this.cancel_in_progress_fetches = true; - *this.pc = new_next_pc.into(); - } - } -} - impl SimValueDefault for NextPcState> { #[hdl] fn sim_value_default(self) -> SimValue { let Self { - speculative_call_stack, - non_speculative_call_stack, - branch_target_buffer, - branch_history, - branch_predictor: _, - fetching_queue, - cancel_in_progress_fetches: _, - pc: _, - fetch_block_id: _, + next_pc_stage_state, + next_pc_stage_outputs, + br_pred_stage_state, + br_pred_stage_outputs, + fetch_decode_stage_state, + fetch_decode_stage_outputs, + post_decode_stage_state, + post_decode_stage_outputs, + rename_dispatch_execute_stage_state, + rename_dispatch_execute_stage_outputs, + retire_stage_state, config, } = self; #[hdl(sim)] Self { - speculative_call_stack: speculative_call_stack.sim_value_default(), - non_speculative_call_stack: non_speculative_call_stack.sim_value_default(), - branch_target_buffer: branch_target_buffer.sim_value_default(), - branch_history: branch_history.sim_value_default(), - // use something other than the default so you can see the reset progress - branch_predictor: std::array::from_fn(|_| { - BranchPredictionState::towards_not_taken(&BranchPredictionState.sim_value_default()) - }), - fetching_queue: fetching_queue.sim_value_default(), - cancel_in_progress_fetches: false, - // use something other than the default so you can see the reset progress - pc: !0u64, - // use something other than the default so you can see the reset progress - fetch_block_id: !0u8, + next_pc_stage_state: next_pc_stage_state.sim_value_default(), + next_pc_stage_outputs: next_pc_stage_outputs.sim_value_default(), + br_pred_stage_state: br_pred_stage_state.sim_value_default(), + br_pred_stage_outputs: br_pred_stage_outputs.sim_value_default(), + fetch_decode_stage_state: fetch_decode_stage_state.sim_value_default(), + fetch_decode_stage_outputs: fetch_decode_stage_outputs.sim_value_default(), + post_decode_stage_state: post_decode_stage_state.sim_value_default(), + post_decode_stage_outputs: post_decode_stage_outputs.sim_value_default(), + rename_dispatch_execute_stage_state: rename_dispatch_execute_stage_state + .sim_value_default(), + rename_dispatch_execute_stage_outputs: rename_dispatch_execute_stage_outputs + .sim_value_default(), + retire_stage_state: retire_stage_state.sim_value_default(), config, } } } -impl> ResetSteps for NextPcState { +impl ResetSteps for NextPcState> { #[hdl] fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { #[hdl(sim)] - let NextPcState:: { - speculative_call_stack, - non_speculative_call_stack, - branch_target_buffer, - branch_history, - branch_predictor, - fetching_queue, - cancel_in_progress_fetches, - pc, - fetch_block_id, + let NextPcState::<_> { + next_pc_stage_state, + next_pc_stage_outputs, + br_pred_stage_state, + br_pred_stage_outputs, + fetch_decode_stage_state, + fetch_decode_stage_outputs, + post_decode_stage_state, + post_decode_stage_outputs, + rename_dispatch_execute_stage_state, + rename_dispatch_execute_stage_outputs, + retire_stage_state, config: _, } = this; - **cancel_in_progress_fetches = false; - **pc = 0u64.into(); // match Microwatt's reset PC - **fetch_block_id = 0u8.into(); - let speculative_call_stack = ResetSteps::reset_step(speculative_call_stack, step); - let non_speculative_call_stack = ResetSteps::reset_step(non_speculative_call_stack, step); - let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, step); - let branch_history = ResetSteps::reset_step(branch_history, step); - let branch_predictor = ResetSteps::reset_step(branch_predictor, step); - let fetching_queue = ResetSteps::reset_step(fetching_queue, step); - speculative_call_stack - .and(non_speculative_call_stack) - .and(branch_target_buffer) - .and(branch_history) - .and(branch_predictor) - .and(fetching_queue) + let next_pc_stage_state = ResetSteps::reset_step(next_pc_stage_state, step); + let next_pc_stage_outputs = ResetSteps::reset_step(next_pc_stage_outputs, step); + let br_pred_stage_state = ResetSteps::reset_step(br_pred_stage_state, step); + let br_pred_stage_outputs = ResetSteps::reset_step(br_pred_stage_outputs, step); + let fetch_decode_stage_state = ResetSteps::reset_step(fetch_decode_stage_state, step); + let fetch_decode_stage_outputs = ResetSteps::reset_step(fetch_decode_stage_outputs, step); + let post_decode_stage_state = ResetSteps::reset_step(post_decode_stage_state, step); + let post_decode_stage_outputs = ResetSteps::reset_step(post_decode_stage_outputs, step); + let rename_dispatch_execute_stage_state = + ResetSteps::reset_step(rename_dispatch_execute_stage_state, step); + let rename_dispatch_execute_stage_outputs = + ResetSteps::reset_step(rename_dispatch_execute_stage_outputs, step); + let retire_stage_state = ResetSteps::reset_step(retire_stage_state, step); + next_pc_stage_state + .and(next_pc_stage_outputs) + .and(br_pred_stage_state) + .and(br_pred_stage_outputs) + .and(fetch_decode_stage_state) + .and(fetch_decode_stage_outputs) + .and(post_decode_stage_state) + .and(post_decode_stage_outputs) + .and(rename_dispatch_execute_stage_state) + .and(rename_dispatch_execute_stage_outputs) + .and(retire_stage_state) } } @@ -1174,6 +2249,12 @@ pub fn next_pc(config: PhantomConst) { let from_decode: DecodeToPostDecodeInterface> = m.input(DecodeToPostDecodeInterface[config]); #[hdl] + let post_decode_output: PostDecodeOutputInterface> = + m.input(PostDecodeOutputInterface[config]); + #[hdl] + let from_retire: RetireToNextPcInterface> = + m.input(RetireToNextPcInterface[config]); + #[hdl] let state_for_debug: NextPcState> = m.output(NextPcState[config]); m.register_clock_for_past(cd.clk); #[hdl] @@ -1182,6 +2263,8 @@ pub fn next_pc(config: PhantomConst) { cd: Expr, to_fetch: Expr>>, from_decode: Expr>>, + post_decode_output: Expr>>, + from_retire: Expr>>, state_expr: Expr>>, ) { let mut state = sim.read(state_expr).await; @@ -1196,49 +2279,43 @@ pub fn next_pc(config: PhantomConst) { } } loop { - sim.write( - from_decode.inner.ready, - !Queue::is_empty(&state.fetching_queue), - ) - .await; - - // ignore current stuff from decode when cancelling - if !*state.cancel_in_progress_fetches - && sim.read_past_bool(from_decode.inner.ready, cd.clk).await - { - #[hdl(sim)] - if let HdlSome(from_decode_data) = - sim.read_past(from_decode.inner.data, cd.clk).await - { - NextPcState::handle_from_decode(&mut state, from_decode_data); - } - } - - let to_fetch_data; - if sim.read_past_bool(to_fetch.inner.ready, cd.clk).await { - (state, to_fetch_data) = NextPcState::handle_to_fetch(state); - } else { - (_, to_fetch_data) = NextPcState::handle_to_fetch(state.clone()); - }; - sim.write(to_fetch.inner.data, to_fetch_data).await; - + todo!(); sim.write(state_expr, state).await; sim.wait_for_clock_edge(cd.clk).await; state = sim.read_past(state_expr, cd.clk).await; } } m.extern_module_simulation_fn( - (cd, to_fetch, from_decode, state_for_debug), - |(cd, to_fetch, from_decode, state_for_debug), mut sim| async move { + ( + cd, + to_fetch, + from_decode, + post_decode_output, + from_retire, + state_for_debug, + ), + |args, mut sim| async move { + let (cd, to_fetch, from_decode, post_decode_output, from_retire, state_for_debug) = + args; sim.write(state_for_debug, state_for_debug.ty().sim_value_default()) .await; sim.resettable( cd, - async |mut sim: ExternModuleSimulationState| { + |mut sim: ExternModuleSimulationState| async move { sim.write(to_fetch.inner.data, HdlNone()).await; sim.write(from_decode.inner.ready, false).await; }, - |sim, ()| run(sim, cd, to_fetch, from_decode, state_for_debug), + |sim, ()| { + run( + sim, + cd, + to_fetch, + from_decode, + post_decode_output, + from_retire, + state_for_debug, + ) + }, ) .await; }, diff --git a/crates/cpu/src/next_pc/next_pc.mermaid b/crates/cpu/src/next_pc/next_pc.mermaid new file mode 100644 index 0000000..05ac31c --- /dev/null +++ b/crates/cpu/src/next_pc/next_pc.mermaid @@ -0,0 +1,25 @@ +stateDiagram-v2 + direction LR + + state "Next PC" as next_pc + [*] --> next_pc + + state "Fetch/Decode" as fetch_decode + next_pc --> fetch_decode + + state "Branch Predictor" as br_pred + next_pc --> br_pred + br_pred --> next_pc: cancel following + + state "Post-decode" as post_decode + fetch_decode --> post_decode + br_pred --> post_decode + post_decode --> next_pc: cancel following + + state "Rename\nDispatch\nExecute" as execute + post_decode --> execute + + state "Retire" as retire + execute --> retire + retire --> [*] + retire --> next_pc: cancel following \ No newline at end of file diff --git a/crates/cpu/src/reg_alloc.rs b/crates/cpu/src/reg_alloc.rs index c84ba6f..13fc8b3 100644 --- a/crates/cpu/src/reg_alloc.rs +++ b/crates/cpu/src/reg_alloc.rs @@ -578,7 +578,8 @@ pub fn reg_alloc(config: &CpuConfig) { connect(unit_to_reg_alloc.unit_forwarding_info, unit_forwarding_info); connect( unit_forwarding_info.unit_output_writes[unit_index], - unit_forwarding_info.ty() + unit_forwarding_info + .ty() .unit_output_writes .element() .HdlNone(), diff --git a/crates/cpu/src/unit/alu_branch.rs b/crates/cpu/src/unit/alu_branch.rs index ff0ba1a..8f20592 100644 --- a/crates/cpu/src/unit/alu_branch.rs +++ b/crates/cpu/src/unit/alu_branch.rs @@ -272,10 +272,7 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { connect(unit_to_reg_alloc, unit_base.unit_to_reg_alloc); connect(unit_base.cd, cd); connect(unit_base.execute_start.ready, true); - connect( - unit_base.execute_end, - unit_base.execute_end.ty().HdlNone(), - ); + connect(unit_base.execute_end, unit_base.execute_end.ty().HdlNone()); #[hdl] if let HdlSome(execute_start) = ReadyValid::firing_data(unit_base.execute_start) { #[hdl] diff --git a/crates/cpu/src/util/array_vec.rs b/crates/cpu/src/util/array_vec.rs index 16be553..b28b029 100644 --- a/crates/cpu/src/util/array_vec.rs +++ b/crates/cpu/src/util/array_vec.rs @@ -34,6 +34,18 @@ impl ArrayVec { len: 0u8.cast_to(self.len), } } + #[hdl] + pub fn new_full_sim( + self, + elements: impl ToSimValueWithType>, + ) -> SimValue { + let elements = elements.to_sim_value_with_type(self.elements); + #[hdl(sim)] + Self { + elements, + len: self.elements.len().to_sim_value_with_type(self.len), + } + } pub fn element(self) -> T { self.elements.element() }