diff --git a/Cargo.lock b/Cargo.lock index 6a0911e..90538a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -211,6 +211,7 @@ version = "0.1.0" dependencies = [ "fayalite", "serde", + "simple-mermaid", ] [[package]] @@ -690,6 +691,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simple-mermaid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589144a964b4b30fe3a83b4bb1a09e2475aac194ec832a046a23e75bddf9eb29" + [[package]] name = "strsim" version = "0.11.1" diff --git a/Cargo.toml b/Cargo.toml index a3e74f0..00f2e67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ rust-version = "1.89.0" [workspace.dependencies] fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } serde = { version = "1.0.202", features = ["derive"] } +simple-mermaid = "0.2.0" [profile.dev] opt-level = 1 diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index 4dd85d8..2f5f84c 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -17,3 +17,4 @@ version.workspace = true [dependencies] fayalite.workspace = true serde.workspace = true +simple-mermaid.workspace = true diff --git a/crates/cpu/src/next_pc.rs b/crates/cpu/src/next_pc.rs index 7c62d72..9c0c6bb 100644 --- a/crates/cpu/src/next_pc.rs +++ b/crates/cpu/src/next_pc.rs @@ -8,17 +8,21 @@ //! after the `decode` stage there's a `post_decode` stage (that may run in the same clock cycle as `decode`) //! that checks that the fetched instructions' kinds match the predicted instruction kinds and that feeds //! information back to the `fetch` stage to cancel fetches that need to be predicted differently. +//! +#![doc = simple_mermaid::mermaid!("next_pc/next_pc.mermaid")] use crate::{ config::{CpuConfig, CpuConfigFetchWidth}, util::array_vec::ArrayVec, }; use fayalite::{ + expr::HdlPartialEqImpl, int::{UIntInRange, UIntInRangeInclusive, UIntInRangeType}, prelude::*, sim::value::SimOnlyValueTrait, util::ready_valid::ReadyValid, }; +use std::borrow::Cow; #[hdl] pub enum PredictedCond { @@ -142,6 +146,939 @@ pub struct PostDecodeOutputInterface> { pub config: C, } +#[hdl] +struct TrainBranchPredictor { + branch_predictor_index: UIntInRange<0, { BRANCH_PREDICTOR_SIZE }>, + taken: Bool, +} + +#[hdl(no_static)] +struct Cancel> { + call_stack: CallStack, + start_pc: UInt<64>, + new_btb_entry: HdlOption, + btb_entry_index: HdlOption>, + train_branch_predictor: HdlOption, + branch_history: UInt<6>, + config: C, +} + +/// the output of `Stage::run`. +/// when cancelling operations, the returned [`StageOutput.cancel`] should be the state after running all operations returned in [`StageOutput.output`] +#[hdl(no_static)] +struct StageOutput> { + outputs: ArrayVec, + cancel: HdlOption>, +} + +trait Stage: Type + SimValueDefault + ResetSteps { + type Inputs: Type; + type Output: Type; + type MaxOutputCount: Size; + + fn output_ty(config: PhantomConst) -> Self::Output; + fn max_output_count( + config: PhantomConst, + ) -> ::SizeType; + fn stage_output_ty( + config: PhantomConst, + ) -> StageOutput> { + StageOutput[Self::output_ty(config)][Self::max_output_count(config)][config] + } + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>>; + /// changes state to match `cancel` + fn cancel(state: &mut SimValue, cancel: &SimValue>>); +} + +#[hdl(no_static)] +struct NextPcStageOutput> { + start_pc: UInt<64>, + next_start_pc: UInt<64>, + btb_entry: HdlOption<( + UIntInRange<0, { BranchTargetBuffer::SIZE }>, + BTBEntryWithoutStartPc, + )>, + fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, + start_call_stack: CallStack, + config: C, +} + +#[hdl(no_static)] +struct NextPcStageState> { + call_stack: CallStack, + branch_target_buffer: BranchTargetBuffer, + next_pc: UInt<64>, + next_fetch_block_id: UInt<{ FETCH_BLOCK_ID_WIDTH }>, + config: C, +} + +impl SimValueDefault for NextPcStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + call_stack, + branch_target_buffer, + next_pc: _, + next_fetch_block_id: _, + config, + } = self; + #[hdl(sim)] + Self { + call_stack: call_stack.sim_value_default(), + branch_target_buffer: branch_target_buffer.sim_value_default(), + // use something other than the default so you can see the reset progress + next_pc: !0u64, + // use something other than the default so you can see the reset progress + next_fetch_block_id: !0u8, + config, + } + } +} + +impl ResetSteps for NextPcStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + next_pc, + next_fetch_block_id, + config: _, + } = this; + **next_pc = 0u64.into(); // match Microwatt's reset PC + **next_fetch_block_id = 0u8.into(); + let call_stack = ResetSteps::reset_step(call_stack, step); + let branch_target_buffer = ResetSteps::reset_step(branch_target_buffer, step); + call_stack.and(branch_target_buffer) + } +} + +impl Stage for NextPcStageState> { + type Inputs = (); + type Output = NextPcStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + NextPcStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + _inputs: &SimValue, + ) -> SimValue>> { + let config = state.config.ty(); + let start_call_stack = state.call_stack.clone(); + let fetch_block_id = state.next_fetch_block_id.as_int(); + *state.next_fetch_block_id = state.next_fetch_block_id.as_int().wrapping_add(1).into(); + let start_pc = state.next_pc.as_int(); + let fetch_pc = start_pc & (!0u64 << config.get().log2_fetch_width_in_bytes); + + let btb_entry_index = state + .branch_target_buffer + .branch_pc_to_target_map + .iter() + .position(|entry| { + #[hdl(sim)] + match entry { + HdlNone => false, + HdlSome(entry) => entry.start_pc.as_int() == start_pc, + } + }); + let (next_start_pc, btb_entry) = if let Some(btb_entry_index) = btb_entry_index { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + .. + } = state; + let entry = #[hdl(sim)] + match &branch_target_buffer.branch_pc_to_target_map[btb_entry_index] { + HdlSome(entry) => entry, + _ => unreachable!(), + }; + let next_start_pc = #[hdl(sim)] + match &entry.rest.insn_kind { + BTBEntryInsnKind::Branch => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + BTBEntry::taken_pc(entry) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Call => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + CallStack::push(call_stack, BTBEntry::after_call_pc(entry)); + BTBEntry::taken_pc(entry) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Ret => { + if BTBEntryAddrKind::taken(&entry.rest.addr_kind) { + CallStack::pop(call_stack).unwrap_or(BTBEntry::taken_pc(entry)) + } else { + BTBEntry::not_taken_start_pc(entry) + } + } + BTBEntryInsnKind::Unknown => unreachable!(), + }; + ( + next_start_pc, + #[hdl(sim)] + HdlSome((btb_entry_index, &entry.rest)), + ) + } else { + ( + fetch_pc.wrapping_add(config.get().fetch_width_in_bytes() as u64), + #[hdl(sim)] + HdlNone(), + ) + }; + let output = #[hdl(sim)] + NextPcStageOutput::<_> { + start_pc, + next_start_pc, + btb_entry, + fetch_block_id, + start_call_stack, + config, + }; + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + cancel: #[hdl(sim)] + (HdlOption[Cancel[config]]).HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + #[hdl(sim)] + let Self { + call_stack, + branch_target_buffer, + next_pc, + next_fetch_block_id: _, + config: _, + } = state; + #[hdl(sim)] + let Cancel::<_> { + call_stack: new_call_stack, + start_pc, + new_btb_entry, + btb_entry_index, + train_branch_predictor: _, + branch_history: _, + config: _, + } = cancel; + call_stack.clone_from(new_call_stack); + next_pc.clone_from(start_pc); + #[hdl(sim)] + if let HdlSome(new_btb_entry) = new_btb_entry { + // add/update btb entry + + // get old entry if it's still there + let btb_entry_index = #[hdl(sim)] + if let HdlSome(btb_entry_index) = btb_entry_index { + #[hdl(sim)] + if let HdlSome(entry) = + &branch_target_buffer.branch_pc_to_target_map[**btb_entry_index] + { + if entry.start_pc == *start_pc { + // found the old entry + Some(**btb_entry_index) + } else { + None + } + } else { + None + } + } else { + None + }; + + let btb_entry_index = btb_entry_index.unwrap_or_else(|| { + // old entry isn't there, pick an entry to replace + BranchTargetBuffer::next_index_to_replace(branch_target_buffer) + }); + + // replace with new entry + branch_target_buffer.branch_pc_to_target_map[btb_entry_index] = #[hdl(sim)] + HdlSome( + #[hdl(sim)] + BTBEntry { + start_pc, + rest: new_btb_entry, + }, + ); + } else if let HdlSome(btb_entry_index) = btb_entry_index { + // remove btb entry if it's still there + let entry_mut = &mut branch_target_buffer.branch_pc_to_target_map[**btb_entry_index]; + #[hdl(sim)] + if let HdlSome(entry) = &entry_mut { + if entry.start_pc == *start_pc { + // we found it, remove it + *entry_mut = #[hdl(sim)] + HdlNone(); + } + } + } + } +} + +#[hdl(no_static)] +struct BrPredStageOutput> { + start_branch_history: UInt<6>, + train_branch_predictor_for_misprediction: HdlOption, + config: C, +} + +impl SimValueDefault for BrPredStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + start_branch_history: self.start_branch_history.zero(), + train_branch_predictor_for_misprediction: #[hdl(sim)] + HdlNone(), + config: self.config, + } + } +} + +#[hdl(no_static)] +struct BrPredStageState> { + branch_history: UInt<6>, + branch_predictor: Array, + config: C, +} + +impl BrPredStageState> { + fn branch_predictor_index(this: &SimValue, branch_pc: u64) -> usize { + let mut t = this.branch_history.cast_to_static::>().as_int(); + t ^= t.rotate_left(5) & !branch_pc.rotate_right(3); + t ^= branch_pc; + t ^= !t.rotate_left(2) & t.rotate_left(4); + let mut retval = 0; + for i in (0..BRANCH_PREDICTOR_LOG2_SIZE).step_by(BRANCH_PREDICTOR_LOG2_SIZE) { + retval ^= t >> i; + } + retval as usize % BRANCH_PREDICTOR_SIZE + } +} + +impl SimValueDefault for BrPredStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + branch_history: _, + branch_predictor: _, + config, + } = self; + #[hdl(sim)] + Self { + // use something other than the default so you can see the reset progress + branch_history: (-1i8).cast_to_static::>(), + // use something other than the default so you can see the reset progress + branch_predictor: std::array::from_fn(|_| { + BranchPredictionState::towards_not_taken(&BranchPredictionState.sim_value_default()) + }), + config, + } + } +} + +impl ResetSteps for BrPredStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { + branch_history, + branch_predictor, + config: _, + } = this; + **branch_history = 0u8.cast_to_static::>(); + ResetSteps::reset_step(branch_predictor, step) + } +} + +impl Stage for BrPredStageState> { + type Inputs = NextPcStageOutput>; + type Output = BrPredStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + BrPredStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + let config = state.config.ty(); + #[hdl(sim)] + let NextPcStageOutput::<_> { + start_pc, + next_start_pc: _, + btb_entry, + fetch_block_id: _, + start_call_stack, + config: _, + } = inputs; + let start_branch_history = state.branch_history.clone(); + let mut train_branch_predictor_for_misprediction = #[hdl(sim)] + HdlNone(); + #[hdl(sim)] + if let HdlSome(btb_entry) = btb_entry { + let taken_and_opposite_addr_kind = #[hdl(sim)] + match &btb_entry.1.addr_kind { + BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => None, + BTBEntryAddrKind::CondTaken => Some(( + true, + #[hdl(sim)] + BTBEntryAddrKind::CondNotTaken(), + )), + BTBEntryAddrKind::CondNotTaken => Some(( + false, + #[hdl(sim)] + BTBEntryAddrKind::CondTaken(), + )), + }; + if let Some((taken, opposite_addr_kind)) = taken_and_opposite_addr_kind { + let index = Self::branch_predictor_index( + state, + BTBEntry::branch_pc( + &#[hdl(sim)] + BTBEntry { + start_pc, + rest: &btb_entry.1, + }, + ), + ); + if taken != BranchPredictionState::is_taken(&state.branch_predictor[index]) { + let btb_entry_index = &btb_entry.0; + let mut btb_entry = btb_entry.1.clone(); + btb_entry.addr_kind = opposite_addr_kind; + let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + let retval = #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs.sim_value_default(), + cancel: #[hdl(sim)] + cancel.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack: start_call_stack, + start_pc, + new_btb_entry: #[hdl(sim)] + HdlSome(btb_entry), + btb_entry_index: #[hdl(sim)] + HdlSome(btb_entry_index), + train_branch_predictor: #[hdl(sim)] + HdlNone(), + branch_history: start_branch_history, + config, + }, + ), + }; + return retval; + } + train_branch_predictor_for_misprediction = #[hdl(sim)] + HdlSome( + #[hdl(sim)] + TrainBranchPredictor { + branch_predictor_index: index.cast_to_static::>(), + taken: !taken, // invert since this is for a misprediction + }, + ); + state.branch_history = ((&state.branch_history << 1) + | taken.cast_to_static::>()) + .cast_to_static(); + } + } + let output = #[hdl(sim)] + BrPredStageOutput::<_> { + start_branch_history, + train_branch_predictor_for_misprediction, + config, + }; + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: Self::stage_output_ty(config).outputs.new_full_sim([output]), + cancel: #[hdl(sim)] + (HdlOption[Cancel[config]]).HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, cancel: &SimValue>>) { + #[hdl(sim)] + let Cancel::<_> { + call_stack: _, + start_pc: _, + new_btb_entry: _, + btb_entry_index: _, + train_branch_predictor, + branch_history, + config: _, + } = cancel; + #[hdl(sim)] + if let HdlSome(train_branch_predictor) = train_branch_predictor { + #[hdl(sim)] + let TrainBranchPredictor { + branch_predictor_index, + taken, + } = train_branch_predictor; + let branch_prediction_state = &mut state.branch_predictor[**branch_predictor_index]; + if **taken { + *branch_prediction_state = + BranchPredictionState::towards_taken(branch_prediction_state); + } else { + *branch_prediction_state = + BranchPredictionState::towards_not_taken(branch_prediction_state); + } + } + state.branch_history.clone_from(branch_history); + } +} + +#[hdl(no_static)] +struct FetchDecodeStageState> { + config: C, +} + +impl SimValueDefault for FetchDecodeStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + config: self.config, + } + } +} + +impl ResetSteps for FetchDecodeStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +#[hdl(no_static)] +struct FetchDecodeStageOutput> { + next_pc_stage_output: NextPcStageOutput, + decode_output: DecodeToPostDecodeInterfaceInner, +} + +impl Stage for FetchDecodeStageState> { + type Inputs = FetchDecodeStageOutput>; + type Output = FetchDecodeStageOutput>; + type MaxOutputCount = ConstUsize<1>; + + fn output_ty(config: PhantomConst) -> Self::Output { + FetchDecodeStageOutput[config] + } + + fn max_output_count( + _config: PhantomConst, + ) -> ::SizeType { + ConstUsize + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + let StageOutput { outputs, cancel } = Self::stage_output_ty(config); + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs.new_full_sim([inputs]), + cancel: #[hdl(sim)] + cancel.HdlNone(), + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + +#[hdl(no_static)] +struct PostDecodeStageState> { + config: C, +} + +#[hdl(no_static)] +struct PostDecodeStageOutput> { + insn: WipDecodedInsn, + config: C, +} + +impl SimValueDefault for PostDecodeStageOutput> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + insn: self.insn.sim_value_default(), + config: self.config, + } + } +} + +impl SimValueDefault for PostDecodeStageState> { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + Self { + config: self.config, + } + } +} + +impl ResetSteps for PostDecodeStageState> { + #[hdl] + fn reset_step(this: &mut SimValue, _step: usize) -> ResetStatus { + #[hdl(sim)] + let Self { config: _ } = this; + ResetStatus::Done + } +} + +impl Stage for PostDecodeStageState> { + type Inputs = ( + FetchDecodeStageOutput>, + BrPredStageOutput>, + ); + type Output = PostDecodeStageOutput>; + type MaxOutputCount = CpuConfigFetchWidth>; + + fn output_ty(config: PhantomConst) -> Self::Output { + PostDecodeStageOutput[config] + } + + fn max_output_count( + config: PhantomConst, + ) -> ::SizeType { + CpuConfigFetchWidth[config] + } + + #[hdl] + fn run( + state: &mut SimValue, + inputs: &SimValue, + ) -> SimValue>> { + #[hdl(sim)] + let Self { config } = state; + let config = config.ty(); + #[hdl(sim)] + let (fetch_decode_stage_output, br_pred_stage_output) = inputs; + #[hdl(sim)] + let FetchDecodeStageOutput::<_> { + next_pc_stage_output, + decode_output, + } = fetch_decode_stage_output; + #[hdl(sim)] + let NextPcStageOutput::<_> { + start_pc, + next_start_pc: predicted_next_start_pc, + btb_entry: predicted_btb_entry, + fetch_block_id, + start_call_stack, + config: _, + } = next_pc_stage_output; + #[hdl(sim)] + let DecodeToPostDecodeInterfaceInner::<_> { insns, config: _ } = decode_output; + assert_ne!( + **ArrayVec::len_sim(&insns), + 0, + "fetch/decode must always return at least one instruction \ + -- either the decoded instructions or a WipDecodedInsnKind::Interrupt", + ); + let insns = ArrayVec::elements_sim_ref(&insns); + let StageOutput { + outputs: outputs_ty, + cancel: cancel_ty, + } = Self::stage_output_ty(config); + assert_eq!(outputs_ty.capacity(), decode_output.insns.ty().capacity()); + let mut outputs = outputs_ty.sim_value_default(); + let mut add_output_insn = |insn: &SimValue, fallthrough_offset: &mut u8| { + ArrayVec::try_push_sim( + &mut outputs, + #[hdl(sim)] + PostDecodeStageOutput::<_> { insn, config }, + ) + .expect("known to be in bounds"); + *fallthrough_offset += insn.size_in_bytes.cast_to_static::>().as_int(); + }; + if let Some(target_pc) = WipDecodedInsnKind::interrupt_target_pc_sim(&insns[0].kind) { + add_output_insn(&insns[0], &mut 0); + let mut call_stack = start_call_stack.clone(); + CallStack::push(&mut call_stack, start_pc); + let retval = #[hdl(sim)] + StageOutput::<_, _, _> { + outputs, + cancel: #[hdl(sim)] + cancel_ty.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack, + start_pc: target_pc, + new_btb_entry: #[hdl(sim)] + HdlNone(), + btb_entry_index: #[hdl(sim)] + HdlNone(), + train_branch_predictor: #[hdl(sim)] + HdlNone(), + branch_history: &br_pred_stage_output.start_branch_history, + config, + }, + ), + }; + return retval; + } + let mut fallthrough_offset = 0u8; + let mut branch_offset = 0u8; + let mut after_call_offset = 0u8; + let mut btb_entry_fields = None; + for insn in insns { + #[hdl(sim)] + let WipDecodedInsn { + fetch_block_id: insn_fetch_block_id, + id: _, + pc, + size_in_bytes: _, + kind, + } = insn; + assert_eq!( + insn_fetch_block_id, fetch_block_id, + "fetch decode pipeline's output isn't in-sync with fetching_queue", + ); + let guess_branch_addr_kind = |fallback_taken| { + #[hdl(sim)] + if let HdlSome(entry) = predicted_btb_entry { + let addr_kind = &entry.1.addr_kind; + #[hdl(sim)] + match addr_kind { + BTBEntryAddrKind::Unconditional | BTBEntryAddrKind::Indirect => {} + BTBEntryAddrKind::CondTaken | BTBEntryAddrKind::CondNotTaken => { + return addr_kind.clone(); + } + } + } + if fallback_taken { + #[hdl(sim)] + BTBEntryAddrKind::CondTaken() + } else { + #[hdl(sim)] + BTBEntryAddrKind::CondNotTaken() + } + }; + let insn_kind; + let addr_kind; + let target_pc = #[hdl(sim)] + match kind { + WipDecodedInsnKind::NonBranch => { + add_output_insn(insn, &mut fallthrough_offset); + continue; + } + WipDecodedInsnKind::Branch(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + Some(target_pc.as_int()) + } + WipDecodedInsnKind::BranchCond(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + // guess backwards branches are taken and forwards branches are not + addr_kind = guess_branch_addr_kind(target_pc.as_int() <= pc.as_int()); + Some(target_pc.as_int()) + } + WipDecodedInsnKind::IndirectBranch => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Branch(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Indirect(); + None + } + WipDecodedInsnKind::Call(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + Some(target_pc.as_int()) + } + WipDecodedInsnKind::CallCond(target_pc) => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + // guess conditional calls are taken + addr_kind = guess_branch_addr_kind(true); + Some(target_pc.as_int()) + } + WipDecodedInsnKind::IndirectCall => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Call(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Indirect(); + None + } + WipDecodedInsnKind::Ret => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Ret(); + addr_kind = #[hdl(sim)] + BTBEntryAddrKind::Unconditional(); + None + } + WipDecodedInsnKind::RetCond => { + insn_kind = #[hdl(sim)] + BTBEntryInsnKind::Ret(); + // guess conditional returns are taken + addr_kind = guess_branch_addr_kind(true); + None + } + WipDecodedInsnKind::Interrupt(_) => { + // interrupt after other instructions, just truncate the fetch block before the interrupt + break; + } + WipDecodedInsnKind::Unknown => unreachable!(), + }; + + // all branches/calls/returns end up here + + if btb_entry_fields.is_some() { + // TODO: maybe implement handling multiple ctrl transfer insns in the same fetch block, + // for now we just truncate the fetch block right before the second ctrl transfer insn. + break; + } + branch_offset = fallthrough_offset; + add_output_insn(insn, &mut fallthrough_offset); + #[hdl(sim)] + match &insn_kind { + BTBEntryInsnKind::Call => after_call_offset = fallthrough_offset, + BTBEntryInsnKind::Branch | BTBEntryInsnKind::Ret | BTBEntryInsnKind::Unknown => {} + } + btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); + } + let new_btb_entry = if let Some((insn_kind, addr_kind, target_pc)) = btb_entry_fields { + #[hdl(sim)] + HdlSome( + #[hdl(sim)] + BTBEntryWithoutStartPc { + target_pc: target_pc.unwrap_or_else(|| predicted_next_start_pc.as_int()), + fallthrough_offset, + branch_offset, + after_call_offset, + insn_kind, + addr_kind, + }, + ) + } else { + #[hdl(sim)] + HdlNone() + }; + let (btb_entry_index, predicted_btb_entry) = #[hdl(sim)] + match predicted_btb_entry { + HdlSome(predicted_btb_entry) => { + #[hdl(sim)] + let (btb_entry_index, predicted_btb_entry) = predicted_btb_entry; + ( + #[hdl(sim)] + HdlSome(btb_entry_index), + #[hdl(sim)] + HdlSome(predicted_btb_entry), + ) + } + HdlNone => ( + #[hdl(sim)] + HdlNone(), + #[hdl(sim)] + HdlNone(), + ), + }; + if *new_btb_entry.cmp_ne(predicted_btb_entry) { + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs: outputs_ty.sim_value_default(), + cancel: #[hdl(sim)] + cancel_ty.HdlSome( + #[hdl(sim)] + Cancel::<_> { + call_stack: start_call_stack, + start_pc, + new_btb_entry, + btb_entry_index, + train_branch_predictor: #[hdl(sim)] + HdlNone(), + branch_history: &br_pred_stage_output.start_branch_history, + config, + }, + ), + } + } else { + #[hdl(sim)] + StageOutput::<_, _, _> { + outputs, + cancel: #[hdl(sim)] + cancel_ty.HdlNone(), + } + } + } + + #[hdl] + fn cancel(state: &mut SimValue, _cancel: &SimValue>>) { + #[hdl(sim)] + let Self { config: _ } = state; + } +} + +#[hdl(no_static)] +struct RenameDispatchExecuteStageState> { + config: C, +} + +#[hdl(no_static)] +struct RenameDispatchExecuteStageOutput> { + // TODO: add fields + config: C, +} + +#[hdl(no_static)] +struct RetireStageState> { + config: C, +} + +#[hdl(no_static)] +struct RetireStageOutput> { + // TODO: add fields + config: C, +} + #[hdl] enum BranchPredictionState { StronglyNotTaken, @@ -221,6 +1158,12 @@ impl SimValueDefault for SimOnly { } } +impl SimValueDefault for ArrayVec { + fn sim_value_default(self) -> SimValue { + self.new_sim(self.element().sim_value_default()) + } +} + impl SimValueDefault for HdlOption { fn sim_value_default(self) -> SimValue { self.HdlNone().to_sim_value_with_type(self) @@ -239,6 +1182,35 @@ impl SimValueDefault for UIntType { } } +impl SimValueDefault for WipDecodedInsnKind { + #[hdl] + fn sim_value_default(self) -> SimValue { + #[hdl(sim)] + WipDecodedInsnKind::NonBranch() + } +} + +impl SimValueDefault for WipDecodedInsn { + #[hdl] + fn sim_value_default(self) -> SimValue { + let Self { + fetch_block_id, + id, + pc, + size_in_bytes, + kind, + } = self; + #[hdl(sim)] + WipDecodedInsn { + fetch_block_id: fetch_block_id.sim_value_default(), + id: id.sim_value_default(), + pc: pc.sim_value_default(), + size_in_bytes: size_in_bytes.sim_value_default(), + kind: kind.sim_value_default(), + } + } +} + trait ResetSteps: Type { fn reset_step(this: &mut SimValue, step: usize) -> ResetStatus; } @@ -328,6 +1300,43 @@ enum BTBEntryInsnKind { Ret, } +// TODO: replace with #[hdl(cmp_eq)] when that's implemented for enums +impl HdlPartialEqImpl for BTBEntryInsnKind { + #[track_caller] + fn cmp_value_eq( + lhs: Self, + lhs_value: Cow<'_, Self::SimValue>, + rhs: Self, + rhs_value: Cow<'_, Self::SimValue>, + ) -> bool { + *Self::cmp_sim_value_eq( + Cow::Owned(SimValue::from_value(lhs, lhs_value.into_owned())), + Cow::Owned(SimValue::from_value(rhs, rhs_value.into_owned())), + ) + } + + #[track_caller] + fn cmp_sim_value_eq( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) == SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_sim_value_ne( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) != SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_expr_eq(lhs: Expr, rhs: Expr) -> Expr { + lhs.cast_to_bits().cmp_eq(rhs.cast_to_bits()) + } +} + impl BTBEntryInsnKind { #[hdl] fn try_from_decoded_insn_kind(kind: &SimValue) -> Option> { @@ -364,6 +1373,43 @@ enum BTBEntryAddrKind { CondNotTaken, } +// TODO: replace with #[hdl(cmp_eq)] when that's implemented for enums +impl HdlPartialEqImpl for BTBEntryAddrKind { + #[track_caller] + fn cmp_value_eq( + lhs: Self, + lhs_value: Cow<'_, Self::SimValue>, + rhs: Self, + rhs_value: Cow<'_, Self::SimValue>, + ) -> bool { + *Self::cmp_sim_value_eq( + Cow::Owned(SimValue::from_value(lhs, lhs_value.into_owned())), + Cow::Owned(SimValue::from_value(rhs, rhs_value.into_owned())), + ) + } + + #[track_caller] + fn cmp_sim_value_eq( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) == SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_sim_value_ne( + lhs: Cow<'_, SimValue>, + rhs: Cow<'_, SimValue>, + ) -> SimValue { + (SimValue::bits(&*lhs) != SimValue::bits(&*rhs)).to_sim_value() + } + + #[track_caller] + fn cmp_expr_eq(lhs: Expr, rhs: Expr) -> Expr { + lhs.cast_to_bits().cmp_eq(rhs.cast_to_bits()) + } +} + impl BTBEntryAddrKind { #[hdl] fn taken(this: &SimValue) -> bool { @@ -410,28 +1456,52 @@ impl BTBEntryAddrKind { } } -#[hdl] -struct BTBEntry { - /// address of first instruction to run in this fetch block - start_pc: UInt<64>, +#[hdl(cmp_eq)] +struct BTBEntryWithoutStartPc { target_pc: UInt<64>, /// when branch is not taken, the next pc to fetch from is `start_pc + fallthrough_offset`. /// needed because there may be more than one branch in a fetch block fallthrough_offset: UInt<8>, + /// the pc to use for branch prediction is `start_pc + branch_offset` + branch_offset: UInt<8>, /// when a call is made, the return address is `start_pc + after_call_offset` after_call_offset: UInt<8>, insn_kind: BTBEntryInsnKind, addr_kind: BTBEntryAddrKind, } +#[hdl] +struct BTBEntry { + /// address of first instruction to run in this fetch block + start_pc: UInt<64>, + rest: BTBEntryWithoutStartPc, +} + impl BTBEntry { fn taken_pc(this: &SimValue) -> u64 { - this.target_pc.as_int() + this.rest.target_pc.as_int() } - fn not_taken_fetch_pc(this: &SimValue) -> u64 { + fn not_taken_start_pc(this: &SimValue) -> u64 { + Self::fallthrough_pc(this) + } + /// when branch is not taken, this returns the next pc to fetch from. + /// needed because there may be more than one branch in a fetch block + fn fallthrough_pc(this: &SimValue) -> u64 { this.start_pc .as_int() - .wrapping_add(this.fallthrough_offset.as_int().into()) + .wrapping_add(this.rest.fallthrough_offset.as_int().into()) + } + /// the pc to use for branch prediction + fn branch_pc(this: &SimValue) -> u64 { + this.start_pc + .as_int() + .wrapping_add(this.rest.branch_offset.as_int().into()) + } + /// when a call is made, this gives the return address + fn after_call_pc(this: &SimValue) -> u64 { + this.start_pc + .as_int() + .wrapping_add(this.rest.after_call_offset.as_int().into()) } } @@ -489,11 +1559,15 @@ impl SimValueDefault for BranchTargetBuffer { #[hdl(sim)] BTBEntry { start_pc: !0u64, - target_pc: !0u64, - fallthrough_offset: !0u8, - after_call_offset: !0u8, - insn_kind: BTBEntryInsnKind.Call(), - addr_kind: BTBEntryAddrKind.CondNotTaken(), + rest: #[hdl(sim)] + BTBEntryWithoutStartPc { + target_pc: !0u64, + fallthrough_offset: !0u8, + branch_offset: !0u8, + after_call_offset: !0u8, + insn_kind: BTBEntryInsnKind.Call(), + addr_kind: BTBEntryAddrKind.CondNotTaken(), + }, }, ); Self::SIZE], next_index_to_replace_lfsr: LFSR31.sim_value_default(), @@ -1007,13 +2081,13 @@ impl> NextPcState { // for now we just truncate the fetch block right before the second ctrl transfer insn. break; } - btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); - fallthrough_offset += size_in_bytes.cast_to_static::>().as_int(); #[hdl(sim)] - match insn_kind { + match &insn_kind { BTBEntryInsnKind::Call => after_call_offset = fallthrough_offset, BTBEntryInsnKind::Branch | BTBEntryInsnKind::Ret | BTBEntryInsnKind::Unknown => {} } + btb_entry_fields = Some((insn_kind, addr_kind, target_pc)); + fallthrough_offset += size_in_bytes.cast_to_static::>().as_int(); } let new_next_pc = if let Some((insn_kind, addr_kind, mut target_pc)) = btb_entry_fields { // add/update BTBEntry if it doesn't match @@ -1046,15 +2120,22 @@ impl> NextPcState { BranchTargetBuffer::next_index_to_replace(&mut this.branch_target_buffer) }); let new_next_pc = #[hdl(sim)] - match insn_kind { - BTBEntryInsnKind::Branch => {} + match &insn_kind { + BTBEntryInsnKind::Branch => { + todo!() + } BTBEntryInsnKind::Call => { - CallStack::push(&mut this.speculative_call_stack, todo!()); + CallStack::push( + &mut this.speculative_call_stack, + start_pc + u64::from(after_call_offset), + ); todo!() } BTBEntryInsnKind::Ret => { target_pc = CallStack::pop(&mut this.speculative_call_stack).or(target_pc); + target_pc.unwrap_or(0u64) } + BTBEntryInsnKind::Unknown => unreachable!(), }; let new_entry = #[hdl(sim)] BTBEntry { diff --git a/crates/cpu/src/next_pc/next_pc.mermaid b/crates/cpu/src/next_pc/next_pc.mermaid new file mode 100644 index 0000000..05ac31c --- /dev/null +++ b/crates/cpu/src/next_pc/next_pc.mermaid @@ -0,0 +1,25 @@ +stateDiagram-v2 + direction LR + + state "Next PC" as next_pc + [*] --> next_pc + + state "Fetch/Decode" as fetch_decode + next_pc --> fetch_decode + + state "Branch Predictor" as br_pred + next_pc --> br_pred + br_pred --> next_pc: cancel following + + state "Post-decode" as post_decode + fetch_decode --> post_decode + br_pred --> post_decode + post_decode --> next_pc: cancel following + + state "Rename\nDispatch\nExecute" as execute + post_decode --> execute + + state "Retire" as retire + execute --> retire + retire --> [*] + retire --> next_pc: cancel following \ No newline at end of file diff --git a/crates/cpu/src/util/array_vec.rs b/crates/cpu/src/util/array_vec.rs index 16be553..b28b029 100644 --- a/crates/cpu/src/util/array_vec.rs +++ b/crates/cpu/src/util/array_vec.rs @@ -34,6 +34,18 @@ impl ArrayVec { len: 0u8.cast_to(self.len), } } + #[hdl] + pub fn new_full_sim( + self, + elements: impl ToSimValueWithType>, + ) -> SimValue { + let elements = elements.to_sim_value_with_type(self.elements); + #[hdl(sim)] + Self { + elements, + len: self.elements.len().to_sim_value_with_type(self.len), + } + } pub fn element(self) -> T { self.elements.element() }