diff --git a/Cargo.lock b/Cargo.lock index 09345a1..67bc26e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -207,6 +207,8 @@ name = "cpu" version = "0.1.0" dependencies = [ "fayalite", + "name_mangling_serde", + "serde", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 94355a7..38cade6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ categories = [] rust-version = "1.82.0" [workspace.dependencies] +name_mangling_serde = { version = "=0.1.0", path = "crates/name_mangling_serde" } fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } serde = { version = "1.0.202", features = ["derive"] } serde_json = { version = "1.0.117", features = ["preserve_order"] } diff --git a/crates/cpu/Cargo.toml b/crates/cpu/Cargo.toml index 16ec0b9..9ff26c3 100644 --- a/crates/cpu/Cargo.toml +++ b/crates/cpu/Cargo.toml @@ -16,3 +16,8 @@ version.workspace = true [dependencies] fayalite.workspace = true +serde.workspace = true +name_mangling_serde.workspace = true + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(todo)'] } diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 5be163c..6a0d27a 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -1,16 +1,17 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information use crate::{ - instruction::{MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum, CONST_ZERO_UNIT_NUM}, - unit::{ - unit_base::{UnitForwardingInfo, UnitToRegAlloc}, - UnitCancelInput, UnitKind, UnitOutputWrite, - }, + instruction::{PRegNum, CONST_ZERO_UNIT_NUM}, + unit::UnitKind, }; -use fayalite::prelude::*; +use fayalite::{ + intern::{Intern, Interned}, + prelude::*, +}; +use serde::{Deserialize, Serialize}; use std::num::NonZeroUsize; -#[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)] #[non_exhaustive] pub struct UnitConfig { pub kind: UnitKind, @@ -27,15 +28,14 @@ impl UnitConfig { } } -#[derive(Clone, Eq, PartialEq, Hash, Debug)] +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug, Serialize, Deserialize)] #[non_exhaustive] pub struct CpuConfig { - pub units: Vec, + pub units: Interned<[UnitConfig]>, pub out_reg_num_width: usize, pub fetch_width: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, - pub rob_size: NonZeroUsize, } impl CpuConfig { @@ -52,13 +52,12 @@ impl CpuConfig { }; v }; - pub fn new(units: Vec, rob_size: NonZeroUsize) -> Self { + pub fn new(units: Interned<[UnitConfig]>) -> Self { Self { units, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, - rob_size, } } pub fn non_const_unit_nums(&self) -> std::ops::Range { @@ -67,53 +66,83 @@ impl CpuConfig { pub fn unit_num_width(&self) -> usize { UInt::range(CONST_ZERO_UNIT_NUM..self.non_const_unit_nums().end).width() } - pub fn unit_num(&self) -> UnitNum { - UnitNum[self.unit_num_width()] - } - pub fn unit_out_reg_num(&self) -> UnitOutRegNum { - UnitOutRegNum[self.out_reg_num_width] - } - pub fn p_reg_num(&self) -> PRegNum { - PRegNum[self.unit_num_width()][self.out_reg_num_width] - } - pub fn p_reg_num_width(&self) -> usize { - self.unit_num_width() + self.out_reg_num_width - } - pub fn renamed_mop_in_unit(&self) -> RenamedMOp, DynSize> { - RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()] - } - pub fn unit_output_write(&self) -> UnitOutputWrite { - UnitOutputWrite[self.out_reg_num_width] - } - pub fn unit_output_writes(&self) -> Array>> { - Array[HdlOption[self.unit_output_write()]][self.non_const_unit_nums().len()] - } - pub fn unit_cancel_input(&self) -> UnitCancelInput { - UnitCancelInput[self.out_reg_num_width] - } - pub fn unit_forwarding_info(&self) -> UnitForwardingInfo { - UnitForwardingInfo[self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] - } pub fn unit_max_in_flight(&self, unit_index: usize) -> NonZeroUsize { self.units[unit_index] .max_in_flight .unwrap_or(self.default_unit_max_in_flight) } - pub fn unit_to_reg_alloc< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, - ExtraOut: Type, - >( - &self, - mop_ty: MOp, - extra_out_ty: ExtraOut, - ) -> UnitToRegAlloc { - assert_eq!( - mop_ty.dest_reg_ty(), - self.unit_out_reg_num(), - "inconsistent types", - ); - UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width] - [self.non_const_unit_nums().len()] + pub fn retire_queue_index_width(&self) -> usize { + let max_in_flight: usize = (0..self.units.len()) + .map(|unit_index| self.unit_max_in_flight(unit_index).get()) + .sum(); + 2 + max_in_flight.next_power_of_two().ilog2() as usize } } + +mod sealed { + pub trait Sealed {} +} + +impl sealed::Sealed for PhantomConst {} + +pub trait CpuConfigType: Type + ToExpr + sealed::Sealed { + fn get(self) -> Interned; +} + +impl CpuConfigType for PhantomConst { + fn get(self) -> Interned { + self.get() + } +} + +pub trait Identity { + type SelfType: ?Sized; + type ArgType: ?Sized; +} + +impl Identity for T { + type SelfType = T; + type ArgType = Arg; +} + +macro_rules! impl_cpu_config_accessors { + ( + $( + #[without_generics = $without_generics:ident] + $vis:vis type $ident:ident<$T:ident> = |$arg:ident| $expr:expr; + )* + ) => { + $( + #[allow(non_camel_case_types)] + $vis struct $without_generics; + + #[allow(non_upper_case_globals)] + $vis const $ident: $without_generics = $without_generics; + + $vis type $ident<$T> = >::SelfType; + + impl<$T: CpuConfigType> std::ops::Index<$T> for $without_generics { + type Output = usize; + + fn index(&self, $arg: $T) -> &Self::Output { + Interned::into_inner(Intern::intern_sized($expr)) + } + } + )* + }; +} + +impl_cpu_config_accessors! { + #[without_generics = __UnitNumWidth_WithoutGenerics] + pub type UnitNumWidth = |arg| arg.get().unit_num_width(); + #[without_generics = __UnitOutRegNumWidth_WithoutGenerics] + pub type UnitOutRegNumWidth = |arg| arg.get().out_reg_num_width; + #[without_generics = __PRegNumWidth_WithoutGenerics] + pub type PRegNumWidth = |arg| PRegNum[arg].canonical().bit_width(); + #[without_generics = __RetireQueueIndexWidth_WithoutGenerics] + pub type RetireQueueIndexWidth = |arg| arg.get().retire_queue_index_width(); + #[without_generics = __UnitCount_WithoutGenerics] + pub type UnitCount = |arg| arg.get().non_const_unit_nums().len(); + #[without_generics = __FetchWidth_WithoutGenerics] + pub type FetchWidth = |arg| arg.get().fetch_width.get(); +} diff --git a/crates/cpu/src/instruction.rs b/crates/cpu/src/instruction.rs index 80dd9d5..0d85ff9 100644 --- a/crates/cpu/src/instruction.rs +++ b/crates/cpu/src/instruction.rs @@ -1,6 +1,10 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::{unit::UnitMOp, util::range_u32_len}; +use crate::{ + config::{CpuConfigType, UnitNumWidth, UnitOutRegNumWidth}, + unit::UnitMOp, + util::range_u32_len, +}; use fayalite::{ expr::ops::{ArrayLiteral, ExprPartialEq}, intern::Interned, @@ -805,19 +809,21 @@ common_mop_struct! { } } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// there may be more than one unit of a given kind, so UnitNum is not the same as UnitKind. /// zero is used for built-in constants, such as the zero register -pub struct UnitNum { - pub adj_value: UIntType, +pub struct UnitNum { + pub adj_value: UIntType>, + pub config: C, } -impl UnitNum { +impl UnitNum { #[hdl] pub fn const_zero(self) -> Expr { #[hdl] UnitNum { adj_value: CONST_ZERO_UNIT_NUM.cast_to(self.adj_value), + config: self.config, } } #[hdl] @@ -825,6 +831,7 @@ impl UnitNum { #[hdl] UnitNum { adj_value: (index + 1).cast_to(self.adj_value), + config: self.config, } } pub fn is_index(expr: impl ToExpr, index: usize) -> Expr { @@ -835,7 +842,7 @@ impl UnitNum { .cmp_eq(expr.adj_value) } #[hdl] - pub fn as_index(expr: impl ToExpr) -> Expr>> { + pub fn as_index(expr: impl ToExpr) -> Expr>> { let expr = expr.to_expr(); #[hdl] let unit_index = wire(HdlOption[Expr::ty(expr).adj_value]); @@ -853,19 +860,20 @@ impl UnitNum { pub const CONST_ZERO_UNIT_NUM: usize = 0; -#[hdl(cmp_eq)] -pub struct UnitOutRegNum { - pub value: UIntType, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutRegNum { + pub value: UIntType>, + pub config: C, } -#[hdl(cmp_eq)] +#[hdl(cmp_eq, no_static)] /// Physical Register Number -- registers in the CPU's backend -pub struct PRegNum { - pub unit_num: UnitNum, - pub unit_out_reg: UnitOutRegNum, +pub struct PRegNum { + pub unit_num: UnitNum, + pub unit_out_reg: UnitOutRegNum, } -impl PRegNum { +impl PRegNum { #[hdl] pub fn const_zero(self) -> Expr { #[hdl] @@ -874,6 +882,7 @@ impl PRegNum = 0xFE..0x100; - /// registers handled by a special small rename table (for flags and stuff, since it has more read/write ports) - pub const SPECIAL_REG_NUMS: Range = Self::FLAG_REG_NUMS; - /// registers handled by the large rename table for normal registers (has less read/write ports) - pub const NORMAL_REG_NUMS: Range = - Self::CONST_ZERO_REG_NUM + 1..Self::SPECIAL_REG_NUMS.start; + /// registers that aren't constants + pub const NON_CONST_REG_NUMS: Range = + Self::CONST_ZERO_REG_NUM + 1..Self::FLAG_REG_NUMS.end; } #[hdl(cmp_eq)] @@ -929,29 +936,6 @@ pub struct MOpDestReg { pub flag_regs: Array, { range_u32_len(&MOpRegNum::FLAG_REG_NUMS) }>, } -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] -pub enum RenameTableName { - /// the large rename table for normal registers (has less read/write ports) - Normal, - /// a special small rename table (for flags and stuff, since it has more read/write ports) - Special, -} - -impl RenameTableName { - pub const fn reg_range(self) -> std::ops::Range { - match self { - Self::Normal => MOpRegNum::NORMAL_REG_NUMS, - Self::Special => MOpRegNum::SPECIAL_REG_NUMS, - } - } - pub const fn as_str(self) -> &'static str { - match self { - Self::Normal => "rename_table_normal", - Self::Special => "rename_table_special", - } - } -} - #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] pub enum MOpDestRegKind { NormalReg { @@ -989,16 +973,13 @@ impl fmt::Display for MOpDestRegName { } impl MOpDestRegKind { - pub const fn reg_range(self) -> std::ops::Range { + pub const fn reg_num_range(self) -> std::ops::Range { match self { - Self::NormalReg { .. } => MOpRegNum::NORMAL_REG_NUMS, - Self::FlagReg { .. } => MOpRegNum::FLAG_REG_NUMS, - } - } - pub const fn rename_table_names(self) -> &'static [RenameTableName] { - match self { - Self::NormalReg { .. } => &[RenameTableName::Normal, RenameTableName::Special], - Self::FlagReg { .. } => &[RenameTableName::Special], + Self::NormalReg { dest_reg_index: _ } => MOpRegNum::NON_CONST_REG_NUMS, + Self::FlagReg { + reg_num, + flag_reg_index: _, + } => reg_num..reg_num + 1, } } pub fn fixed_reg_num(self) -> Option { @@ -1091,5 +1072,5 @@ pub type MOp = UnitMOp< >; #[hdl] -pub type RenamedMOp = - UnitMOp>; +pub type RenamedMOp = + UnitMOp<(), SrcRegWidth, L2RegisterFileMOp<(), SrcRegWidth>>; diff --git a/crates/cpu/src/instruction_rename.rs b/crates/cpu/src/instruction_rename.rs new file mode 100644 index 0000000..8acca1b --- /dev/null +++ b/crates/cpu/src/instruction_rename.rs @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::{CpuConfig, CpuConfigType, FetchWidth, PRegNumWidth}, + instruction::{MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, RenamedMOp}, + rename_table::{rename_table, RenameTablePortConfig}, + unit::{RenamedInsnData, RetireQueueIndex, UnitMOp}, + util::array_vec::{ArrayVec, Length, ReadyValidArray}, +}; +use fayalite::{ + prelude::*, + util::{prefix_sum::PrefixSumAlgorithm, ready_valid::ReadyValid}, +}; + +#[hdl(no_static)] +pub struct InstructionRenameInputInsn { + pub mop: MOp, + pub pc: UInt<64>, + pub renamed_dest: PRegNum, +} + +#[hdl(no_static)] +struct InsnsInPrefixSummary { + all_ready: Bool, + ready_count: Length>, + retire_queue_used: Length>, + config: C, +} + +type C = PhantomConst; + +#[hdl] +pub type InstructionRenameInsnsOut = ArrayType< + ReadyValid>, PRegNum>>, + FetchWidth, +>; + +#[hdl_module] +pub fn instruction_rename(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let insns_in: ReadyValidArray, FetchWidth> = + m.input(ReadyValidArray[InstructionRenameInputInsn[config]][FetchWidth[config]]); + #[hdl] + let start_retire_queue_index: RetireQueueIndex = m.input(RetireQueueIndex[config]); + #[hdl] + let end_retire_queue_index: RetireQueueIndex = m.output(RetireQueueIndex[config]); + #[hdl] + let insns_out: InstructionRenameInsnsOut = m.output(InstructionRenameInsnsOut[config]); + + // TODO: handle resetting table after cancelling instructions + + #[hdl] + let insns_ready_or_move = wire(ArrayType[Bool][FetchWidth[config]]); + + for (insn_ready_or_move, insn_out) in insns_ready_or_move.into_iter().zip(insns_out) { + connect(insn_ready_or_move, insn_out.ready); + } + + ArrayVec::for_each(insns_in.data, |fetch_index, input_insn| { + #[hdl] + match input_insn.mop { + UnitMOp::<_, _, _>::TransformedMove(_) => { + connect(insns_ready_or_move[fetch_index], true); + } + UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {} + } + }); + + let insns_in_prefix_summary_ty = InsnsInPrefixSummary[config]; + #[hdl] + let insns_in_prefix_summaries = wire(ArrayType[insns_in_prefix_summary_ty][FetchWidth[config]]); + let insns_in_prefix_summaries_vec = PrefixSumAlgorithm::WorkEfficient.run( + (0..FetchWidth[config]).map(|fetch_index| { + #[hdl] + let insns_in_prefix_summary_in = wire(insns_in_prefix_summary_ty); + #[hdl] + let InsnsInPrefixSummary::<_> { + all_ready, + ready_count, + retire_queue_used, + config: _, + } = insns_in_prefix_summary_in; + connect(all_ready, insns_out[fetch_index].ready); + connect( + ready_count, + Expr::ty(ready_count).cast_from_uint_unchecked(all_ready.cast_to(UInt[1])), + ); + connect(retire_queue_used, Expr::ty(retire_queue_used).zero()); + #[hdl] + if let HdlSome(input_insn) = ArrayVec::get(insns_in.data, fetch_index) { + connect(retire_queue_used, ready_count); + #[hdl] + match input_insn.mop { + UnitMOp::<_, _, _>::TransformedMove(_) => { + connect(all_ready, true); + } + UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {} + } + } + insns_in_prefix_summary_in + }), + |l, r| { + #[hdl] + let insns_in_prefix_summary_merge = wire(insns_in_prefix_summary_ty); + #[hdl] + let InsnsInPrefixSummary::<_> { + all_ready, + ready_count, + retire_queue_used, + config: _, + } = insns_in_prefix_summary_merge; + connect(all_ready, l.all_ready & r.all_ready); + #[hdl] + if l.all_ready { + connect( + ready_count, + Expr::ty(ready_count).cast_from_uint_unchecked( + Length::as_uint(l.ready_count) + Length::as_uint(r.ready_count), + ), + ); + connect( + retire_queue_used, + Expr::ty(retire_queue_used).cast_from_uint_unchecked( + Length::as_uint(l.retire_queue_used) + Length::as_uint(r.retire_queue_used), + ), + ); + } else { + connect(ready_count, l.ready_count); + connect(retire_queue_used, l.retire_queue_used); + } + insns_in_prefix_summary_merge + }, + ); + for (l, r) in insns_in_prefix_summaries + .into_iter() + .zip(insns_in_prefix_summaries_vec) + { + connect(l, r); + } + connect( + insns_in.ready, + insns_in_prefix_summaries[FetchWidth[config] - 1].ready_count, + ); + + #[hdl] + let retire_queue_indexes = wire(Array[RetireQueueIndex[config]][FetchWidth[config] + 1]); + connect(retire_queue_indexes[0], start_retire_queue_index); + connect( + end_retire_queue_index, + retire_queue_indexes[FetchWidth[config]], + ); + for (retire_queue_index, insns_in_prefix_summary) in retire_queue_indexes + .into_iter() + .skip(1) + .zip(insns_in_prefix_summaries) + { + connect_any( + retire_queue_index.index, + start_retire_queue_index.index + + Length::as_uint(insns_in_prefix_summary.retire_queue_used), + ); + } + + let mut port_configs = Vec::new(); + let mut src_reg_count = 0; + MOpTrait::for_each_src_reg(MOp.uninit(), &mut |_, src_index| { + src_reg_count = src_reg_count.max(src_index + 1); + }); + for _ in 0..FetchWidth[config] { + for _ in 0..src_reg_count { + port_configs.push(RenameTablePortConfig::Read { + addr_range: MOpRegNum::NON_CONST_REG_NUMS, + }); + } + for dest_reg_kind in MOpDestReg::REG_KINDS { + port_configs.push(RenameTablePortConfig::Write { + addr_range: dest_reg_kind.reg_num_range(), + }); + } + } + + #[hdl] + let rename_table = instance(rename_table(config, &port_configs)); + + connect(rename_table.cd, cd); + + for read_port in rename_table.read_ports { + connect_any(read_port.addr, 0_hdl_u0); + } + for write_port in rename_table.write_ports { + connect_any(write_port.addr, 0_hdl_u0); + connect_any(write_port.data, PRegNum[config].const_zero()); + } + + ArrayVec::for_each( + ReadyValidArray::firing_data(insns_in), + |fetch_index, input_insn| { + let read_port_index = fetch_index * src_reg_count; + let write_port_index = fetch_index * MOpDestReg::REG_COUNT; + #[hdl] + let InstructionRenameInputInsn::<_> { + mop, + pc, + renamed_dest, + } = input_insn; + let insn_out = + MOpTrait::map_regs(mop, (), PRegNumWidth[config], &mut |src_reg, src_index| { + connect( + rename_table.read_ports[read_port_index + src_index].addr, + src_reg.cast_bits_to(MOpRegNum), + ); + rename_table.read_ports[read_port_index + src_index] + .data + .cast_to_bits() + }); + for (i, dest_reg) in MOpDestReg::regs(MOpTrait::dest_reg(mop)) + .into_iter() + .enumerate() + { + connect( + rename_table.write_ports[write_port_index + i].addr, + dest_reg, + ); + connect( + rename_table.write_ports[write_port_index + i].data, + renamed_dest, + ); + } + let insn_out = UnitMOp::try_with_transformed_move_op( + insn_out, + RenamedMOp[PRegNumWidth[config]].TransformedMove, + |insn_out: Expr>, move_reg: Expr>| { + for i in 0..MOpDestReg::REG_COUNT { + // execute move by using same PRegNum as src[0] for dest + connect( + rename_table.write_ports[write_port_index + i].data, + move_reg.common.src[0].cast_bits_to(PRegNum[config]), + ); + } + // move already executed, so remove it + connect(insn_out, Expr::ty(insn_out).HdlNone()); + }, + ); + connect( + insns_out[fetch_index].data, + HdlOption::map(insn_out, |insn_out| { + #[hdl] + RenamedInsnData::<_, _, _> { + retire_queue_index: retire_queue_indexes[fetch_index], + pc, + dest: renamed_dest, + mop: insn_out, + } + }), + ); + }, + ); +} diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index bae3720..f561b5d 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,7 +2,10 @@ // See Notices.txt for copyright information pub mod config; pub mod instruction; +pub mod instruction_rename; pub mod reg_alloc; pub mod register; +pub mod rename_table; +pub mod retire_queue; pub mod unit; pub mod util; diff --git a/crates/cpu/src/reg_alloc.rs b/crates/cpu/src/reg_alloc.rs index 6e7bc5d..acf796d 100644 --- a/crates/cpu/src/reg_alloc.rs +++ b/crates/cpu/src/reg_alloc.rs @@ -3,17 +3,17 @@ use crate::{ config::CpuConfig, instruction::{ - MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, RenameTableName, UnitOutRegNum, + MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, UnitOutRegNum, COMMON_MOP_SRC_LEN, }, unit::{ - unit_base::{UnitForwardingInfo, UnitInput}, GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite, UnitResult, UnitResultCompleted, UnitTrait, }, - util::tree_reduce::tree_reduce_with_state, + util::array_vec::ReadyValidArray, }; use fayalite::{ + int::BoolOrIntType, memory::{splat_mask, WriteStruct}, module::{instance_with_loc, memory_with_loc, wire_with_loc}, prelude::*, @@ -44,150 +44,12 @@ pub enum FetchDecodeSpecialOp { #[hdl] pub struct FetchDecodeInterface { - pub decoded_insns: ArrayType, FetchWidth>, + pub decoded_insns: ReadyValidArray, #[hdl(flip)] pub fetch_decode_special_op: ReadyValid, } -#[hdl] -struct ROBRenamedInsn { - mop_dest: MOpDestReg, - p_dest: PRegNum, -} - -#[hdl] -struct ROBEntry { - renamed_insn: ROBRenamedInsn, - dest_written: Bool, -} - -#[hdl_module] -fn rob(config: &CpuConfig) { - #[hdl] - let cd: ClockDomain = m.input(); - #[hdl] - let renamed_insns_in: Array>> = m.input( - Array[ReadyValid[ROBRenamedInsn[config.unit_num_width()][config.out_reg_num_width]]] - [config.fetch_width.get()], - ); - #[hdl] - let unit_forwarding_info: UnitForwardingInfo = - m.input(config.unit_forwarding_info()); - - let rob_entry_ty = ROBEntry[config.unit_num_width()][config.out_reg_num_width]; - #[hdl] - let rob = reg_builder() - .clock_domain(cd) - .no_reset(Array[rob_entry_ty][config.rob_size.get()]); - #[hdl] - let rob_valid_start = reg_builder() - .clock_domain(cd) - .reset(UInt::range(0..config.rob_size.get()).zero()); - #[hdl] - let rob_valid_end = reg_builder() - .clock_domain(cd) - .reset(UInt::range(0..config.rob_size.get()).zero()); - #[hdl] - let free_space = wire(UInt::range_inclusive(0..=config.rob_size.get())); - #[hdl] - if rob_valid_end.cmp_lt(rob_valid_start) { - // rob_valid_end wrapped around but start didn't - connect_any( - free_space, - rob_valid_end + config.rob_size.get() - rob_valid_start, - ); - } else { - connect_any(free_space, rob_valid_end - rob_valid_start); - } - - struct IndexAndRange { - index: Expr, - range: std::ops::Range, - } - - let mut next_write_index = IndexAndRange { - index: rob_valid_end, - range: 0..config.rob_size.get(), - }; - for fetch_index in 0..config.fetch_width.get() { - let write_index = next_write_index; - let next_write_index_range = write_index.range.start..write_index.range.end + 1; - next_write_index = IndexAndRange { - index: wire_with_loc( - &format!("next_write_index_{fetch_index}"), - SourceLocation::caller(), - UInt::range(next_write_index_range.clone()), - ), - range: next_write_index_range, - }; - connect( - renamed_insns_in[fetch_index].ready, - fetch_index.cmp_lt(free_space), - ); - #[hdl] - if let HdlSome(renamed_insn) = ReadyValid::firing_data(renamed_insns_in[fetch_index]) { - for i in write_index.range.clone() { - #[hdl] - if write_index.index.cmp_eq(i) { - connect( - rob[i % config.rob_size.get()], - #[hdl] - ROBEntry { - renamed_insn, - dest_written: false, - }, - ); - } - } - } - // TODO: optimize write_index chain better - connect_any( - next_write_index.index, - write_index.index - + ReadyValid::firing(renamed_insns_in[fetch_index]).cast_to_static::>(), - ); - } - assert!( - config.rob_size >= config.fetch_width, - "rob_size ({}) is too small for fetch_width = {} -- next_write_index would overflow", - config.rob_size, - config.fetch_width, - ); - #[hdl] - if next_write_index.index.cmp_lt(config.rob_size.get()) { - connect_any(rob_valid_end, next_write_index.index); - } else { - connect_any( - rob_valid_end, - next_write_index.index - config.rob_size.get(), - ); - } - - // TODO: optimize better, O(rob_size * unit_count) is too big here - for rob_index in 0..config.rob_size.get() { - for unit_index in 0..config.non_const_unit_nums().len() { - #[hdl] - if let HdlSome(unit_output_write) = unit_forwarding_info.unit_output_writes[unit_index] - { - #[hdl] - let UnitOutputWrite::<_> { - which: unit_out_reg, - value: _, - } = unit_output_write; - let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), - unit_out_reg, - }; - #[hdl] - if rob[rob_index].renamed_insn.p_dest.cmp_eq(p_reg_num) { - connect(rob[rob_index].dest_written, true); - } - } - } - } -} - +#[cfg(todo)] #[hdl_module] /// combination register allocator, register renaming, unit selection, and retire handling pub fn reg_alloc(config: &CpuConfig) { @@ -205,10 +67,6 @@ pub fn reg_alloc(config: &CpuConfig) { ); // TODO: finish - #[hdl] - let rob = instance(rob(config)); - connect(rob.cd, cd); - let mut rename_table_mems = BTreeMap::>::new(); for reg_kind in MOpDestReg::REG_KINDS { @@ -238,11 +96,6 @@ pub fn reg_alloc(config: &CpuConfig) { #[hdl] let renamed_mops_out_reg = wire(Array[HdlOption[config.p_reg_num()]][config.fetch_width.get()]); for fetch_index in 0..config.fetch_width.get() { - // TODO: finish - connect( - rob.renamed_insns_in[fetch_index].data, - Expr::ty(rob).renamed_insns_in.element().data.HdlNone(), - ); // TODO: finish connect( fetch_decode_interface.decoded_insns[fetch_index].ready, @@ -483,7 +336,6 @@ pub fn reg_alloc(config: &CpuConfig) { ); #[hdl] let unit_forwarding_info = wire(config.unit_forwarding_info()); - connect(rob.unit_forwarding_info, unit_forwarding_info); for (unit_index, unit_config) in config.units.iter().enumerate() { let dyn_unit = unit_config.kind.unit(config, unit_index); let unit = instance_with_loc( diff --git a/crates/cpu/src/reg_alloc/unit_free_regs_tracker.rs b/crates/cpu/src/reg_alloc/unit_free_regs_tracker.rs index d19cf2a..133049a 100644 --- a/crates/cpu/src/reg_alloc/unit_free_regs_tracker.rs +++ b/crates/cpu/src/reg_alloc/unit_free_regs_tracker.rs @@ -1,7 +1,10 @@ // SPDX-License-Identifier: LGPL-3.0-or-later // See Notices.txt for copyright information -use crate::util::tree_reduce::tree_reduce; -use fayalite::{module::wire_with_loc, prelude::*, util::ready_valid::ReadyValid}; +use fayalite::{ + module::wire_with_loc, + prelude::*, + util::{prefix_sum::reduce, ready_valid::ReadyValid}, +}; use std::{num::NonZeroUsize, ops::Range}; #[hdl_module] @@ -44,7 +47,7 @@ pub fn unit_free_regs_tracker( count, count_overflowed, alloc_nums, - }) = tree_reduce( + }) = reduce( (0..reg_count).map(|index| Summary { range: index..index + 1, count: (!allocated_reg[index]) diff --git a/crates/cpu/src/rename_table.rs b/crates/cpu/src/rename_table.rs new file mode 100644 index 0000000..a38e4aa --- /dev/null +++ b/crates/cpu/src/rename_table.rs @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::{CpuConfig, CpuConfigType}, + instruction::{MOpRegNum, PRegNum}, + util::range_intersection, +}; +use fayalite::{ + memory::{splat_mask, ReadStruct, WriteStruct}, + module::memory_with_loc, + prelude::*, +}; +use std::{mem, ops::Range}; + +#[hdl(no_static)] +pub struct RenameTableReadPort { + pub addr: MOpRegNum, + #[hdl(flip)] + pub data: PRegNum, +} + +#[hdl(no_static)] +pub struct RenameTableWritePort { + pub addr: MOpRegNum, + pub data: PRegNum, +} + +#[derive(Clone, Debug)] +pub enum RenameTablePortConfig { + Read { addr_range: Range }, + Write { addr_range: Range }, +} + +type C = PhantomConst; + +/// register rename table. +/// all read/write operations are done in the order of `port_configs`. +/// So if `port_configs[0]` is a write and `port_configs[1]` is a read, +/// then the read port will combinatorially return data written by the +/// write port in the *same* clock cycle. However, if `port_configs[0]` +/// is a read and `port_configs[1]` is a write, then the read port will +/// not see the data written by the write port until the *next* clock cycle. +#[hdl_module] +pub fn rename_table(config: PhantomConst, port_configs: &[RenameTablePortConfig]) { + let read_count = port_configs + .iter() + .filter(|v| matches!(v, RenameTablePortConfig::Read { .. })) + .count(); + let write_count = port_configs + .iter() + .filter(|v| matches!(v, RenameTablePortConfig::Write { .. })) + .count(); + + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let read_ports: Array> = + m.input(Array[RenameTableReadPort[config]][read_count]); + #[hdl] + let write_ports: Array> = + m.input(Array[RenameTableWritePort[config]][write_count]); + + for read_port in read_ports { + connect(read_port.data, PRegNum[config].const_zero()); + } + + let port_configs_and_indexes = port_configs.iter().scan( + (0usize, 0), + |(read_port_index, write_port_index), port_config| { + Some(( + port_config, + match port_config { + RenameTablePortConfig::Read { .. } => { + mem::replace(read_port_index, *read_port_index + 1) + } + RenameTablePortConfig::Write { .. } => { + mem::replace(write_port_index, *write_port_index + 1) + } + }, + )) + }, + ); + + let mut range_transitions = Vec::with_capacity(port_configs.len() * 2); + for port_config in port_configs { + let (RenameTablePortConfig::Read { addr_range } + | RenameTablePortConfig::Write { addr_range }) = port_config; + range_transitions.push(addr_range.start); + range_transitions.push(addr_range.end); + } + range_transitions.sort_unstable(); + range_transitions.dedup(); + let mut last_range_transition = None; + for range_transition in range_transitions { + let Some(last_range_transition) = last_range_transition.replace(range_transition) else { + continue; + }; + let cur_addr_range = last_range_transition..range_transition; + let mut mem = memory_with_loc( + &if cur_addr_range.len() == 1 { + format!("mem_{:#x}", cur_addr_range.start) + } else { + format!("mem_{:#x}_{:#x}", cur_addr_range.start, cur_addr_range.end) + }, + PRegNum[config], + SourceLocation::caller(), + ); + mem.depth(cur_addr_range.len()); + let addr_in_range = |addr: Expr| { + if cur_addr_range.len() == 1 { + addr.value.cmp_eq(cur_addr_range.start) + } else { + addr.value.cmp_ge(cur_addr_range.start) & addr.value.cmp_lt(cur_addr_range.end) + } + }; + for (port_config, port_index) in port_configs_and_indexes.clone() { + match port_config { + RenameTablePortConfig::Read { addr_range } => { + if range_intersection(&addr_range, &cur_addr_range).is_none() { + continue; + } + let port = read_ports[port_index]; + #[hdl] + let ReadStruct::<_, _> { + addr, + en, + clk, + data, + } = mem.new_read_port(); + connect_any(addr, port.addr.value - cur_addr_range.start); + connect(en, addr_in_range(port.addr)); + connect(clk, cd.clk); + #[hdl] + if en { + connect(port.data, data); + } + } + RenameTablePortConfig::Write { addr_range } => { + if range_intersection(&addr_range, &cur_addr_range).is_none() { + continue; + } + let port = write_ports[port_index]; + #[hdl] + let WriteStruct::<_, _> { + addr, + en, + clk, + data, + mask, + } = mem.new_write_port(); + connect_any(addr, port.addr.value - cur_addr_range.start); + connect(en, addr_in_range(port.addr)); + connect(clk, cd.clk); + connect(data, port.data); + connect(mask, splat_mask(Expr::ty(port).data, true.to_expr())); + } + } + } + } + for (port_config_index, (port_config, port_index)) in + port_configs_and_indexes.clone().enumerate() + { + let RenameTablePortConfig::Read { addr_range } = port_config else { + continue; + }; + let port = read_ports[port_index]; + for (prev_port_config, prev_port_index) in + port_configs_and_indexes.clone().take(port_config_index) + { + let RenameTablePortConfig::Write { + addr_range: prev_addr_range, + } = prev_port_config + else { + continue; + }; + if range_intersection(addr_range, prev_addr_range).is_none() { + continue; + } + let prev_port = write_ports[prev_port_index]; + #[hdl] + if prev_port.addr.cmp_eq(port.addr) { + connect(port.data, prev_port.data); + } + } + } +} diff --git a/crates/cpu/src/retire_queue.rs b/crates/cpu/src/retire_queue.rs new file mode 100644 index 0000000..7fccb87 --- /dev/null +++ b/crates/cpu/src/retire_queue.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::{CpuConfig, CpuConfigType}, + instruction::{MOpDestReg, PRegNum}, + unit::RetireQueueIndex, +}; +use fayalite::prelude::*; + +#[hdl(no_static)] +pub struct RenameRetireInterface { + pub start_retire_queue_index: RetireQueueIndex, + #[hdl(flip)] + pub end_retire_queue_index: RetireQueueIndex, +} + +#[hdl(no_static)] +pub struct RetireQueueEntry { + pub mop_dest: MOpDestReg, + pub renamed_dest: PRegNum, +} + +type C = PhantomConst; + +#[hdl_module] +pub fn retire_queue(config: PhantomConst) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let rename_retire_interface: RenameRetireInterface = m.output(RenameRetireInterface[config]); + todo!(); +} diff --git a/crates/cpu/src/unit.rs b/crates/cpu/src/unit.rs index cc11c55..80bde55 100644 --- a/crates/cpu/src/unit.rs +++ b/crates/cpu/src/unit.rs @@ -2,19 +2,21 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, CpuConfigType, RetireQueueIndexWidth, UnitCount}, instruction::{ mop_enum, AluBranchMOp, LoadStoreMOp, MOp, MOpDestReg, MOpInto, MOpRegNum, MOpTrait, RenamedMOp, UnitOutRegNum, }, register::{FlagsMode, PRegValue}, - unit::unit_base::UnitToRegAlloc, }; use fayalite::{ bundle::{Bundle, BundleType}, + int::BoolOrIntType, intern::{Intern, Interned}, prelude::*, + util::ready_valid::ReadyValid, }; +use serde::{Deserialize, Serialize}; pub mod alu_branch; pub mod unit_base; @@ -36,7 +38,7 @@ macro_rules! all_units { } ) => { $(#[$enum_meta])* - #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] + #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] $vis enum $UnitKind { $( $(#[$variant_meta])* @@ -45,7 +47,7 @@ macro_rules! all_units { } impl $UnitKind { - pub fn unit(self, config: &CpuConfig, unit_index: usize) -> DynUnit { + pub fn unit(self, config: PhantomConst, unit_index: usize) -> DynUnit { match self { $($UnitKind::$Unit => $create_dyn_unit_fn(config, unit_index),)* } @@ -204,23 +206,28 @@ macro_rules! all_units { })* }; - $(impl<$DestReg: Type, $SrcRegWidth: Size> MOpInto> for $BeforeOp { - fn mop_into_ty(self) -> RenamedMOp<$DestReg, $SrcRegWidth> { - RenamedMOp[MOpTrait::dest_reg_ty(self)][MOpTrait::src_reg_width(self)] - } - fn mop_into(this: Expr) -> Expr> { - MOpInto::>::mop_into_ty(Expr::ty(this)).$BeforeUnit(this) - } - })* + const _: () = { + #[hdl] + type $DestReg = (); - $(impl<$DestReg: Type, $SrcRegWidth: Size> MOpInto> for $AfterOp { - fn mop_into_ty(self) -> RenamedMOp<$DestReg, $SrcRegWidth> { - RenamedMOp[MOpTrait::dest_reg_ty(self)][MOpTrait::src_reg_width(self)] - } - fn mop_into(this: Expr) -> Expr> { - MOpInto::>::mop_into_ty(Expr::ty(this)).$AfterUnit(this) - } - })* + $(impl<$SrcRegWidth: Size> MOpInto> for $BeforeOp { + fn mop_into_ty(self) -> RenamedMOp<$SrcRegWidth> { + RenamedMOp[MOpTrait::src_reg_width(self)] + } + fn mop_into(this: Expr) -> Expr> { + MOpInto::>::mop_into_ty(Expr::ty(this)).$BeforeUnit(this) + } + })* + + $(impl<$SrcRegWidth: Size> MOpInto> for $AfterOp { + fn mop_into_ty(self) -> RenamedMOp<$SrcRegWidth> { + RenamedMOp[MOpTrait::src_reg_width(self)] + } + fn mop_into(this: Expr) -> Expr> { + MOpInto::>::mop_into_ty(Expr::ty(this)).$AfterUnit(this) + } + })* + }; }; } @@ -253,18 +260,80 @@ pub struct GlobalState { pub flags_mode: FlagsMode, } +/// index into the retire queue (the queue of instructions that haven't yet retired) +#[hdl(cmp_eq, no_static)] +pub struct RetireQueueIndex { + /// increases by one for each instruction added to the retire queue. + /// + /// this wraps around, so you must not compare it using `cmp_lt`/`cmp_gt` + /// but instead must use [`Self::insns_until`] and compare the output with zero. + pub index: UIntType>, + pub config: C, +} + +impl RetireQueueIndex { + pub fn insns_until( + this: impl ToExpr, + target: impl ToExpr, + ) -> Expr>> { + let this = this.to_expr(); + let target = target.to_expr(); + assert_eq!(Expr::ty(this), Expr::ty(target)); + (this.index - target.index).cast_to(Expr::ty(this).index.as_same_width_sint()) + } +} + +#[hdl(no_static)] +pub struct RenamedInsnData { + pub retire_queue_index: RetireQueueIndex, + pub pc: UInt<64>, + pub dest: DestReg, + pub mop: MOp, +} + +#[hdl(no_static)] +pub struct UnitForwardingInfo { + pub unit_output_writes: ArrayType>, UnitCount>, + pub unit_reg_frees: ArrayType>, UnitCount>, +} + +#[hdl(no_static)] +pub struct UnitToRegAlloc { + #[hdl(flip)] + pub unit_forwarding_info: UnitForwardingInfo, + #[hdl(flip)] + pub input: ReadyValid>>, + #[hdl(flip)] + pub cancel_input: HdlOption>, + pub output: HdlOption>, + pub ready_for_retire_queue_index: HdlOption>, +} + +impl UnitToRegAlloc { + pub fn mop_ty(self) -> MOp { + self.input.data.HdlSome.mop + } + pub fn extra_out_ty(self) -> ExtraOut { + self.output.HdlSome.extra_out_ty() + } +} + #[hdl(cmp_eq)] pub struct UnitResultCompleted { pub value: PRegValue, pub extra_out: ExtraOut, } -#[hdl(cmp_eq)] -pub struct UnitOutputWrite { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitOutputWrite { + pub dest: UnitOutRegNum, pub value: PRegValue, } +#[hdl] +pub type UnitOutputWrites = + ArrayType>, UnitCount>; + #[hdl(cmp_eq)] pub struct TrapData { // TODO @@ -282,21 +351,31 @@ impl UnitResult { } } -#[hdl] -pub struct UnitOutput { - pub which: UnitOutRegNum, +#[hdl(no_static)] +pub struct UnitOutput { + pub dest: UnitOutRegNum, + pub retire_queue_index: RetireQueueIndex, pub result: UnitResult, } -impl UnitOutput { +impl UnitOutput { pub fn extra_out_ty(self) -> ExtraOut { self.result.extra_out_ty() } } -#[hdl(cmp_eq)] -pub struct UnitCancelInput { - pub which: UnitOutRegNum, +#[hdl(cmp_eq, no_static)] +pub struct UnitCancelInput { + pub target: RetireQueueIndex, +} + +impl UnitCancelInput { + pub fn is_canceled( + this: impl ToExpr, + insn_retire_queue_index: impl ToExpr>, + ) -> Expr { + RetireQueueIndex::insns_until(insn_retire_queue_index, this.to_expr().target).cmp_ge(0i8) + } } pub trait UnitTrait: @@ -312,17 +391,14 @@ pub trait UnitTrait: fn unit_kind(&self) -> UnitKind; - fn extract_mop( - &self, - mop: Expr, DynSize>>, - ) -> Expr>; + fn extract_mop(&self, mop: Expr>) -> Expr>; fn module(&self) -> Interned>; fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr>; + ) -> Expr, Self::MOp, Self::ExtraOut>>; fn cd(&self, this: Expr) -> Expr; @@ -370,10 +446,7 @@ impl UnitTrait for DynUnit { self.unit_kind } - fn extract_mop( - &self, - mop: Expr, DynSize>>, - ) -> Expr> { + fn extract_mop(&self, mop: Expr>) -> Expr> { self.unit.extract_mop(mop) } @@ -384,7 +457,7 @@ impl UnitTrait for DynUnit { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { self.unit.unit_to_reg_alloc(this) } @@ -425,10 +498,7 @@ impl UnitTrait for DynUnitWrapper, DynSize>>, - ) -> Expr> { + fn extract_mop(&self, mop: Expr>) -> Expr> { Expr::from_enum(Expr::as_enum(self.0.extract_mop(mop))) } @@ -439,7 +509,7 @@ impl UnitTrait for DynUnitWrapper, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { Expr::from_bundle(Expr::as_bundle( self.0.unit_to_reg_alloc(Expr::from_bundle(this)), )) diff --git a/crates/cpu/src/unit/alu_branch.rs b/crates/cpu/src/unit/alu_branch.rs index 082fd8d..63d85af 100644 --- a/crates/cpu/src/unit/alu_branch.rs +++ b/crates/cpu/src/unit/alu_branch.rs @@ -2,29 +2,26 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, PRegNumWidth}, instruction::{ - AddSubMOp, AluBranchMOp, AluCommonMOp, CommonMOp, LogicalMOp, MOpTrait, OutputIntegerMode, - RenamedMOp, UnitOutRegNum, COMMON_MOP_SRC_LEN, + AddSubMOp, AluBranchMOp, AluCommonMOp, CommonMOp, LogicalMOp, OutputIntegerMode, + RenamedMOp, COMMON_MOP_SRC_LEN, }, register::{FlagsMode, PRegFlagsPowerISA, PRegFlagsX86, PRegValue}, unit::{ - unit_base::{unit_base, ExecuteEnd, ExecuteStart, UnitToRegAlloc}, + unit_base::{unit_base, ExecuteEnd, ExecuteStart}, DynUnit, DynUnitWrapper, GlobalState, UnitKind, UnitMOp, UnitOutput, UnitResult, - UnitResultCompleted, UnitTrait, + UnitResultCompleted, UnitToRegAlloc, UnitTrait, }, }; use fayalite::{ - intern::{Intern, Interned}, - module::wire_with_loc, - prelude::*, - util::ready_valid::ReadyValid, + intern::Interned, module::wire_with_loc, prelude::*, util::ready_valid::ReadyValid, }; use std::{collections::HashMap, ops::RangeTo}; #[hdl] fn add_sub( - mop: Expr, DynSize, SrcCount>>, + mop: Expr>, pc: Expr>, flags_mode: Expr, src_values: Expr>, @@ -232,7 +229,7 @@ fn add_sub( #[hdl] fn logical( - mop: Expr, DynSize>>, + mop: Expr>, flags_mode: Expr, src_values: Expr>, ) -> Expr> { @@ -245,20 +242,15 @@ fn logical( } #[hdl_module] -pub fn alu_branch(config: &CpuConfig, unit_index: usize) { +pub fn alu_branch(config: PhantomConst, unit_index: usize) { #[hdl] let cd: ClockDomain = m.input(); #[hdl] let unit_to_reg_alloc: UnitToRegAlloc< - AluBranchMOp, DynSize>, + PhantomConst, + AluBranchMOp<(), DynSize>, (), - DynSize, - DynSize, - DynSize, - > = m.output(config.unit_to_reg_alloc( - AluBranchMOp[config.unit_out_reg_num()][config.p_reg_num_width()], - (), - )); + > = m.output(UnitToRegAlloc[config][AluBranchMOp[()][PRegNumWidth[config]]][()]); #[hdl] let global_state: GlobalState = m.input(); @@ -279,13 +271,9 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[hdl] if let HdlSome(execute_start) = ReadyValid::firing_data(unit_base.execute_start) { #[hdl] - let ExecuteStart::<_> { - mop, - pc, - src_values, - } = execute_start; + let ExecuteStart::<_, _> { insn, src_values } = execute_start; #[hdl] - match mop { + match insn.mop { AluBranchMOp::<_, _>::AddSub(mop) => connect( unit_base.execute_end, HdlSome( @@ -293,10 +281,11 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { ExecuteEnd::<_, _> { unit_output: #[hdl] UnitOutput::<_, _> { - which: MOpTrait::dest_reg(mop), + dest: insn.dest, + retire_queue_index: insn.retire_queue_index, result: UnitResult[()].Completed(add_sub( mop, - pc, + insn.pc, global_state.flags_mode, src_values, )), @@ -311,10 +300,11 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { ExecuteEnd::<_, _> { unit_output: #[hdl] UnitOutput::<_, _> { - which: MOpTrait::dest_reg(mop), + dest: insn.dest, + retire_queue_index: insn.retire_queue_index, result: UnitResult[()].Completed(add_sub( mop, - pc, + insn.pc, global_state.flags_mode, src_values, )), @@ -329,7 +319,8 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { ExecuteEnd::<_, _> { unit_output: #[hdl] UnitOutput::<_, _> { - which: MOpTrait::dest_reg(mop), + dest: insn.dest, + retire_queue_index: insn.retire_queue_index, result: UnitResult[()].Completed(logical( mop, global_state.flags_mode, @@ -345,14 +336,14 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) { #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct AluBranch { - config: Interned, + config: PhantomConst, module: Interned>, } impl AluBranch { - pub fn new(config: &CpuConfig, unit_index: usize) -> Self { + pub fn new(config: PhantomConst, unit_index: usize) -> Self { Self { - config: config.intern(), + config, module: alu_branch(config, unit_index), } } @@ -361,7 +352,7 @@ impl AluBranch { impl UnitTrait for AluBranch { type Type = alu_branch; type ExtraOut = (); - type MOp = AluBranchMOp, DynSize>; + type MOp = AluBranchMOp<(), DynSize>; fn ty(&self) -> Self::Type { self.module.io_ty() @@ -379,10 +370,7 @@ impl UnitTrait for AluBranch { UnitKind::AluBranch } - fn extract_mop( - &self, - mop: Expr, DynSize>>, - ) -> Expr> { + fn extract_mop(&self, mop: Expr>) -> Expr> { UnitMOp::alu_branch_mop(mop) } @@ -393,7 +381,7 @@ impl UnitTrait for AluBranch { fn unit_to_reg_alloc( &self, this: Expr, - ) -> Expr> { + ) -> Expr, Self::MOp, Self::ExtraOut>> { this.unit_to_reg_alloc } diff --git a/crates/cpu/src/unit/unit_base.rs b/crates/cpu/src/unit/unit_base.rs index 9a3d0d8..141ca90 100644 --- a/crates/cpu/src/unit/unit_base.rs +++ b/crates/cpu/src/unit/unit_base.rs @@ -2,72 +2,31 @@ // See Notices.txt for copyright information use crate::{ - config::CpuConfig, + config::{CpuConfig, CpuConfigType, UnitOutRegNumWidth}, instruction::{MOpTrait, PRegNum, UnitNum, UnitOutRegNum, COMMON_MOP_SRC_LEN}, register::PRegValue, - unit::{UnitCancelInput, UnitOutput, UnitOutputWrite}, - util::tree_reduce::tree_reduce, + unit::{ + RenamedInsnData, RetireQueueIndex, UnitCancelInput, UnitForwardingInfo, UnitOutput, + UnitOutputWrite, UnitToRegAlloc, + }, }; use fayalite::{ memory::splat_mask, module::{memory_with_loc, wire_with_loc}, prelude::*, ty::StaticType, - util::ready_valid::ReadyValid, + util::{prefix_sum::reduce, ready_valid::ReadyValid}, }; -use std::marker::PhantomData; -#[hdl] -pub struct UnitForwardingInfo { - pub unit_output_writes: ArrayType>, UnitCount>, - pub unit_reg_frees: ArrayType>, UnitCount>, - pub _phantom: PhantomData, -} - -#[hdl] -pub struct UnitInput { - pub mop: MOp, - pub pc: UInt<64>, -} - -#[hdl] -pub struct UnitToRegAlloc< - MOp: Type, - ExtraOut: Type, - UnitNumWidth: Size, - OutRegNumWidth: Size, - UnitCount: Size, -> { - #[hdl(flip)] - pub unit_forwarding_info: UnitForwardingInfo, - #[hdl(flip)] - pub input: ReadyValid>, - #[hdl(flip)] - pub cancel_input: HdlOption>, - pub output: HdlOption>, -} - -impl - UnitToRegAlloc -{ - pub fn mop_ty(self) -> MOp { - self.input.data.HdlSome.mop - } - pub fn extra_out_ty(self) -> ExtraOut { - self.output.HdlSome.extra_out_ty() - } -} - -#[hdl] -pub struct ExecuteStart>> { - pub mop: MOp, - pub pc: UInt<64>, +#[hdl(no_static)] +pub struct ExecuteStart { + pub insn: RenamedInsnData>, pub src_values: Array, } -#[hdl] -pub struct ExecuteEnd { - pub unit_output: UnitOutput, +#[hdl(no_static)] +pub struct ExecuteEnd { + pub unit_output: UnitOutput, } #[hdl] @@ -147,27 +106,34 @@ impl InFlightOpState { } } -#[hdl] -struct InFlightOp { +#[hdl(no_static)] +struct InFlightOp { state: InFlightOpState, - mop: MOp, - pc: UInt<64>, + insn: RenamedInsnData>, src_ready_flags: Array, } -#[hdl] -struct InFlightOpsSummary { - empty_op_index: HdlOption>, - ready_op_index: HdlOption>, +impl InFlightOp { + fn config(self) -> C { + self.insn.retire_queue_index.config + } } -impl InFlightOpsSummary { +#[hdl(no_static)] +struct InFlightOpsSummary { + empty_op_index: HdlOption>, + ready_op_index: HdlOption>, + ready_for_retire_queue_index: HdlOption>, +} + +impl InFlightOpsSummary { #[hdl] fn new( op_index: usize, op_index_ty: UIntType, - in_flight_op: impl ToExpr>>, + in_flight_op: impl ToExpr>>, ) -> Expr { + let in_flight_op = in_flight_op.to_expr(); let empty_op_index = wire_with_loc( &format!("empty_op_index_{op_index}"), SourceLocation::caller(), @@ -180,34 +146,73 @@ impl InFlightOpsSummary { HdlOption[op_index_ty], ); connect(ready_op_index, HdlOption[op_index_ty].HdlNone()); + let ready_for_retire_queue_index = wire_with_loc( + &format!("ready_for_retire_queue_index_{op_index}"), + SourceLocation::caller(), + HdlOption[RetireQueueIndex[Expr::ty(in_flight_op).HdlSome.config()]], + ); + connect( + ready_for_retire_queue_index, + Expr::ty(ready_for_retire_queue_index).HdlNone(), + ); #[hdl] if let HdlSome(in_flight_op) = in_flight_op { #[hdl] - let InFlightOp::<_> { + let InFlightOp::<_, _> { state, - mop: _, - pc: _, + insn, src_ready_flags, } = in_flight_op; connect(ready_op_index, HdlOption[op_index_ty].HdlNone()); + let ready_for_retire_queue_index_value = wire_with_loc( + &format!("ready_for_retire_queue_index_value_{op_index}"), + SourceLocation::caller(), + RetireQueueIndex[Expr::ty(in_flight_op).config()], + ); + connect(ready_for_retire_queue_index_value, insn.retire_queue_index); + // TODO: don't subtract one from `.index` when instruction is ready to + // do something at retire time (e.g. non-speculative stores) + // -- that will need a new InFlightOpState variant. + connect_any( + ready_for_retire_queue_index_value.index, + // subtract one -- this instruction isn't ready to retire, + // but the previous one could be + insn.retire_queue_index.index - 1u8, + ); + #[hdl] match state { - InFlightOpState::Ready => - { + InFlightOpState::Ready => { #[hdl] if src_ready_flags.cmp_eq([true; COMMON_MOP_SRC_LEN]) { connect(ready_op_index, HdlSome(op_index.cast_to(op_index_ty))); } + connect( + ready_for_retire_queue_index, + HdlSome(ready_for_retire_queue_index_value), + ); + } + InFlightOpState::CanceledAndRunning => { + // the instruction has been canceled, but is still + // executing so we treat it as if it doesn't exist + // other than making sure the in_flight_op slot + // isn't reused until execution is done. + } + InFlightOpState::Running => { + connect( + ready_for_retire_queue_index, + HdlSome(ready_for_retire_queue_index_value), + ); } - InFlightOpState::CanceledAndRunning | InFlightOpState::Running => {} } } else { connect(empty_op_index, HdlSome(op_index.cast_to(op_index_ty))); } #[hdl] - InFlightOpsSummary::<_> { + InFlightOpsSummary::<_, _> { empty_op_index, ready_op_index, + ready_for_retire_queue_index, } } #[hdl] @@ -215,22 +220,60 @@ impl InFlightOpsSummary { let l = l.to_expr(); let r = r.to_expr(); #[hdl] - InFlightOpsSummary::<_> { + let combine_ready_for_retire_queue_index = wire(Expr::ty(l).ready_for_retire_queue_index); + #[hdl] + if let HdlSome(l_ready_for_retire_queue_index) = l.ready_for_retire_queue_index { + #[hdl] + if let HdlSome(r_ready_for_retire_queue_index) = r.ready_for_retire_queue_index { + #[hdl] + if RetireQueueIndex::insns_until( + l_ready_for_retire_queue_index, + r_ready_for_retire_queue_index, + ) + .cmp_lt(0i8) + { + connect( + combine_ready_for_retire_queue_index, + l.ready_for_retire_queue_index, + ); + } else { + connect( + combine_ready_for_retire_queue_index, + r.ready_for_retire_queue_index, + ); + } + } else { + connect( + combine_ready_for_retire_queue_index, + l.ready_for_retire_queue_index, + ); + } + } else { + connect( + combine_ready_for_retire_queue_index, + r.ready_for_retire_queue_index, + ); + } + #[hdl] + InFlightOpsSummary::<_, _> { empty_op_index: HdlOption::or(l.empty_op_index, r.empty_op_index), ready_op_index: HdlOption::or(l.ready_op_index, r.ready_op_index), + ready_for_retire_queue_index: combine_ready_for_retire_queue_index, } } } -impl InFlightOpsSummary { +impl InFlightOpsSummary, DynSize> { fn summarize( - in_flight_ops: impl ToExpr>, MaxInFlight>>, + in_flight_ops: impl ToExpr< + Type = ArrayType, MOp>>, MaxInFlight>, + >, ) -> Expr { let in_flight_ops = in_flight_ops.to_expr(); let max_in_flight = Expr::ty(in_flight_ops).len(); let index_range = 0..max_in_flight; let index_ty = UInt::range(index_range.clone()); - tree_reduce( + reduce( index_range.map(|i| Self::new(i, index_ty, in_flight_ops[i])), Self::combine, ) @@ -239,11 +282,8 @@ impl InFlightOpsSummary { } #[hdl_module] -pub fn unit_base< - MOp: Type + MOpTrait, SrcRegWidth = DynSize>, - ExtraOut: Type, ->( - config: &CpuConfig, +pub fn unit_base, ExtraOut: Type>( + config: PhantomConst, unit_index: usize, mop_ty: MOp, extra_out_ty: ExtraOut, @@ -251,18 +291,19 @@ pub fn unit_base< #[hdl] let cd: ClockDomain = m.input(); #[hdl] - let unit_to_reg_alloc: UnitToRegAlloc = - m.output(config.unit_to_reg_alloc(mop_ty, extra_out_ty)); + let unit_to_reg_alloc: UnitToRegAlloc, MOp, ExtraOut> = + m.output(UnitToRegAlloc[config][mop_ty][extra_out_ty]); #[hdl] - let execute_start: ReadyValid> = m.output(ReadyValid[ExecuteStart[mop_ty]]); + let execute_start: ReadyValid, MOp>> = + m.output(ReadyValid[ExecuteStart[config][mop_ty]]); #[hdl] - let execute_end: HdlOption> = - m.input(HdlOption[ExecuteEnd[config.out_reg_num_width][extra_out_ty]]); + let execute_end: HdlOption, ExtraOut>> = + m.input(HdlOption[ExecuteEnd[config][extra_out_ty]]); connect(execute_start.data, Expr::ty(execute_start).data.HdlNone()); - let max_in_flight = config.unit_max_in_flight(unit_index).get(); - let in_flight_op_ty = InFlightOp[mop_ty]; + let max_in_flight = config.get().unit_max_in_flight(unit_index).get(); + let in_flight_op_ty = InFlightOp[config][mop_ty]; #[hdl] let in_flight_ops = reg_builder() .clock_domain(cd) @@ -278,17 +319,21 @@ pub fn unit_base< HdlOption::is_some(in_flight_ops_summary.empty_op_index), ); + connect( + unit_to_reg_alloc.ready_for_retire_queue_index, + in_flight_ops_summary.ready_for_retire_queue_index, + ); + #[hdl] - let UnitForwardingInfo::<_, _, _> { + let UnitForwardingInfo::<_> { unit_output_writes, unit_reg_frees, - _phantom: _, } = unit_to_reg_alloc.unit_forwarding_info; #[hdl] let read_src_regs = wire(mop_ty.src_regs_ty()); connect( read_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let read_src_values = wire(); @@ -297,7 +342,7 @@ pub fn unit_base< let input_src_regs = wire(mop_ty.src_regs_ty()); connect( input_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); #[hdl] let input_src_regs_valid = wire(); @@ -309,7 +354,7 @@ pub fn unit_base< Bool, SourceLocation::caller(), ); - mem.depth(1 << config.out_reg_num_width); + mem.depth(1 << UnitOutRegNumWidth[config]); mem }) .collect(); @@ -319,11 +364,11 @@ pub fn unit_base< PRegValue, SourceLocation::caller(), ); - unit_output_regs.depth(1 << config.out_reg_num_width); + unit_output_regs.depth(1 << UnitOutRegNumWidth[config]); for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs.new_read_port(); - let p_reg_num = read_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = read_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -336,7 +381,7 @@ pub fn unit_base< for src_index in 0..COMMON_MOP_SRC_LEN { let read_port = unit_output_regs_valid[unit_index].new_read_port(); - let p_reg_num = input_src_regs[src_index].cast_bits_to(config.p_reg_num()); + let p_reg_num = input_src_regs[src_index].cast_bits_to(PRegNum[config]); connect_any(read_port.addr, p_reg_num.unit_out_reg.value); connect(read_port.en, false); connect(read_port.clk, cd.clk); @@ -361,15 +406,15 @@ pub fn unit_base< connect(ready_write_port.mask, true); #[hdl] if let HdlSome(unit_output_write) = unit_output_writes[unit_index] { - connect_any(write_port.addr, unit_output_write.which.value); + connect_any(write_port.addr, unit_output_write.dest.value); connect(write_port.data, unit_output_write.value); connect(write_port.en, true); - connect_any(ready_write_port.addr, unit_output_write.which.value); + connect_any(ready_write_port.addr, unit_output_write.dest.value); connect(ready_write_port.en, true); let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), - unit_out_reg: unit_output_write.which, + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), + unit_out_reg: unit_output_write.dest, }; for src_index in 0..COMMON_MOP_SRC_LEN { #[hdl] @@ -399,9 +444,8 @@ pub fn unit_base< execute_start.data, HdlSome( #[hdl] - ExecuteStart::<_> { - mop: in_flight_op.mop, - pc: in_flight_op.pc, + ExecuteStart::<_, _> { + insn: in_flight_op.insn, src_values: read_src_values, }, ), @@ -420,12 +464,17 @@ pub fn unit_base< #[hdl] if let HdlSome(input) = ReadyValid::firing_data(unit_to_reg_alloc.input) { #[hdl] - let UnitInput::<_> { mop, pc } = input; + let RenamedInsnData::<_, _, _> { + retire_queue_index, + pc: _, + dest: _, + mop, + } = input; #[hdl] let input_mop_src_regs = wire(mop_ty.src_regs_ty()); connect( input_mop_src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); MOp::connect_src_regs(mop, input_mop_src_regs); let src_ready_flags = wire_with_loc( @@ -436,20 +485,24 @@ pub fn unit_base< connect(src_ready_flags, input_src_regs_valid); connect(input_src_regs, input_mop_src_regs); #[hdl] - if unit_to_reg_alloc.cancel_input.cmp_ne(HdlSome( - #[hdl] - UnitCancelInput::<_> { - which: MOp::dest_reg(mop), - }, - )) { + let input_is_canceled = wire(); + connect(input_is_canceled, false); + #[hdl] + if let HdlSome(cancel_input) = unit_to_reg_alloc.cancel_input { + connect( + input_is_canceled, + UnitCancelInput::is_canceled(cancel_input, retire_queue_index), + ); + } + #[hdl] + if !input_is_canceled { connect( input_in_flight_op, HdlSome( #[hdl] - InFlightOp::<_> { + InFlightOp::<_, _> { state: InFlightOpState.Ready(), - mop, - pc, + insn: input, src_ready_flags, }, ), @@ -483,13 +536,11 @@ pub fn unit_base< #[hdl] if let HdlSome(in_flight_op) = in_flight_ops[in_flight_op_index] { #[hdl] - let InFlightOp::<_> { + let InFlightOp::<_, _> { state, - mop, - pc, + insn, src_ready_flags, } = in_flight_op; - let which = MOp::dest_reg(mop); let src_regs = wire_with_loc( &format!("in_flight_op_src_regs_{in_flight_op_index}"), SourceLocation::caller(), @@ -497,9 +548,9 @@ pub fn unit_base< ); connect( src_regs, - repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize), + repeat(PRegNum[config].const_zero().cast_to_bits(), ConstUsize), ); - MOp::connect_src_regs(mop, src_regs); + MOp::connect_src_regs(insn.mop, src_regs); #[hdl] if in_flight_ops_summary.ready_op_index.cmp_eq(HdlSome( @@ -517,12 +568,12 @@ pub fn unit_base< if let HdlSome(unit_output_write) = unit_output_writes[unit_index] { #[hdl] let UnitOutputWrite::<_> { - which: unit_out_reg, + dest: unit_out_reg, value: _, } = unit_output_write; let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), + PRegNum::<_> { + unit_num: UnitNum[config].from_index(unit_index), unit_out_reg, }; for src_index in 0..COMMON_MOP_SRC_LEN { @@ -537,20 +588,21 @@ pub fn unit_base< } } - connect( - in_flight_op_canceling[in_flight_op_index], - unit_to_reg_alloc.cancel_input.cmp_eq(HdlSome( - #[hdl] - UnitCancelInput::<_> { which }, - )), - ); + connect(in_flight_op_canceling[in_flight_op_index], false); + #[hdl] + if let HdlSome(cancel_input) = unit_to_reg_alloc.cancel_input { + connect( + in_flight_op_canceling[in_flight_op_index], + UnitCancelInput::is_canceled(cancel_input, insn.retire_queue_index), + ); + } #[hdl] if let HdlSome(execute_end) = execute_end { #[hdl] let ExecuteEnd::<_, _> { unit_output } = execute_end; #[hdl] - if which.cmp_eq(unit_output.which) { + if insn.dest.cmp_eq(unit_output.dest) { connect(in_flight_op_execute_ending[in_flight_op_index], true); #[hdl] if !in_flight_op_canceling[in_flight_op_index] { @@ -567,7 +619,7 @@ pub fn unit_base< #[hdl] if let HdlSome(execute_start) = ReadyValid::firing_data(execute_start) { #[hdl] - if which.cmp_eq(MOp::dest_reg(execute_start.mop)) { + if insn.dest.cmp_eq(execute_start.insn.dest) { connect(in_flight_op_execute_starting[in_flight_op_index], true); } } @@ -594,10 +646,9 @@ pub fn unit_base< in_flight_ops[in_flight_op_index], HdlSome( #[hdl] - InFlightOp::<_> { + InFlightOp::<_, _> { state, - mop, - pc, + insn, src_ready_flags: in_flight_op_next_src_ready_flags[in_flight_op_index], }, ), diff --git a/crates/cpu/src/util.rs b/crates/cpu/src/util.rs index 0b53274..f57003a 100644 --- a/crates/cpu/src/util.rs +++ b/crates/cpu/src/util.rs @@ -2,7 +2,6 @@ // See Notices.txt for copyright information pub mod array_vec; -pub mod tree_reduce; pub(crate) const fn range_u32_len(range: &std::ops::Range) -> usize { let retval = range.end.saturating_sub(range.start); @@ -25,3 +24,16 @@ pub(crate) const fn range_u32_nth_or_panic(range: &std::ops::Range, index: panic!("index out of range") } } + +pub(crate) const fn range_intersection( + a: &std::ops::Range, + b: &std::ops::Range, +) -> Option> { + let start = if a.start > b.start { a.start } else { b.start }; + let end = if a.end < b.end { a.end } else { b.end }; + if start < end { + Some(start..end) + } else { + None + } +} diff --git a/crates/cpu/src/util/array_vec.rs b/crates/cpu/src/util/array_vec.rs index 761f53f..be256b4 100644 --- a/crates/cpu/src/util/array_vec.rs +++ b/crates/cpu/src/util/array_vec.rs @@ -2,8 +2,11 @@ // See Notices.txt for copyright information use fayalite::{ - expr::ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd}, - int::SizeType, + expr::{ + ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd}, + ToLiteralBits, + }, + int::{IntType, SizeType}, intern::{Intern, Interned}, prelude::*, ty::{MatchVariantWithoutScope, StaticType, TypeProperties}, @@ -249,6 +252,29 @@ impl ArrayVec { }); array_vec_as_array_of_options } + #[hdl] + pub fn get>( + this: impl ToExpr, + index: impl ToExpr, + ) -> Expr> { + let this = this.to_expr(); + let index = Expr::as_dyn_int(index.to_expr()); + let never_in_bounds = index.cmp_ge(Expr::ty(this).capacity()); + if let Ok(never_in_bounds) = never_in_bounds.to_literal_bits() { + if never_in_bounds[0] { + // avoid error from out-of-bounds constant index + return HdlOption[Expr::ty(this).element()].HdlNone(); + } + } + #[hdl] + let array_vec_get = wire(HdlOption[Expr::ty(this).element()]); + connect(array_vec_get, Expr::ty(array_vec_get).HdlNone()); + #[hdl] + if index.cmp_lt(Length::as_uint(Self::len(this))) { + connect(array_vec_get, HdlSome(this.elements[index])); + } + array_vec_get + } } impl ExprIndex for ArrayVec @@ -263,3 +289,35 @@ where as ExprIndex>::expr_index(&this.elements, index) } } + +#[hdl] +pub struct ReadyValidArray { + pub data: ArrayVec, + #[hdl(flip)] + pub ready: Length, +} + +impl ReadyValidArray { + #[hdl] + pub fn firing_len(this: impl ToExpr) -> Expr> { + let this = this.to_expr(); + assert_eq!(Expr::ty(this).data.len_ty(), Expr::ty(this).ready); + #[hdl] + let firing_len = wire(Expr::ty(this).data.len); + connect(firing_len, this.data.len); + #[hdl] + if this.data.len.cmp_gt(this.ready) { + connect(firing_len, this.ready); + } + firing_len + } + #[hdl] + pub fn firing_data(this: impl ToExpr) -> Expr> { + let this = this.to_expr(); + #[hdl] + let firing_data = wire(Expr::ty(this).data); + connect(firing_data, this.data); + connect(firing_data.len, Self::firing_len(this)); + firing_data + } +} diff --git a/crates/cpu/src/util/tree_reduce.rs b/crates/cpu/src/util/tree_reduce.rs deleted file mode 100644 index c8d12f7..0000000 --- a/crates/cpu/src/util/tree_reduce.rs +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: LGPL-3.0-or-later -// See Notices.txt for copyright information -#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub enum TreeReduceOp { - Input, - Reduce, -} - -#[derive(Copy, Clone, Debug)] -struct Entry { - start: usize, - depth: u32, -} - -#[derive(Clone, Debug)] -pub struct TreeReduceOps { - len: usize, - stack: Vec, -} - -impl TreeReduceOps { - pub fn new(len: usize) -> Self { - TreeReduceOps { - len, - stack: Vec::new(), - } - } -} - -impl Iterator for TreeReduceOps { - type Item = TreeReduceOp; - fn next(&mut self) -> Option { - match *self.stack { - [] if self.len != 0 => { - self.stack.push(Entry { start: 0, depth: 0 }); - Some(TreeReduceOp::Input) - } - [.., ref mut second_last, last] if second_last.depth == last.depth => { - second_last.depth += 1; - self.stack.pop(); - Some(TreeReduceOp::Reduce) - } - [.., last] if self.len - last.start > 1 << last.depth => { - let start = last.start + (1 << last.depth); - self.stack.push(Entry { start, depth: 0 }); - Some(TreeReduceOp::Input) - } - [.., ref mut second_last, _] => { - second_last.depth += 1; - self.stack.pop(); - Some(TreeReduceOp::Reduce) - } - _ => None, - } - } -} - -#[track_caller] -pub fn tree_reduce_with_state( - iter: impl IntoIterator, - state: &mut S, - mut input: impl FnMut(&mut S, I) -> R, - mut reduce: impl FnMut(&mut S, R, R) -> R, -) -> Option { - let mut stack = Vec::new(); - let mut iter = iter.into_iter(); - for op in TreeReduceOps::new(iter.len()) { - match op { - TreeReduceOp::Input => stack.push(input( - state, - iter.next().expect("inconsistent iterator len() and next()"), - )), - TreeReduceOp::Reduce => { - let Some(r) = stack.pop() else { - unreachable!(); - }; - let Some(l) = stack.pop() else { - unreachable!(); - }; - stack.push(reduce(state, l, r)); - } - } - } - stack.pop() -} - -pub fn tree_reduce( - iter: impl IntoIterator, - mut reduce: impl FnMut(T, T) -> T, -) -> Option { - tree_reduce_with_state(iter, &mut (), |_, v| v, move |_, l, r| reduce(l, r)) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ops::Range; - - fn recursive_tree_reduce(range: Range, ops: &mut Vec) { - if range.len() == 1 { - ops.push(TreeReduceOp::Input); - return; - } - if range.is_empty() { - return; - } - let pow2_len = range.len().next_power_of_two(); - let split = range.start + pow2_len / 2; - recursive_tree_reduce(range.start..split, ops); - recursive_tree_reduce(split..range.end, ops); - ops.push(TreeReduceOp::Reduce); - } - - #[test] - fn test_tree_reduce() { - const EXPECTED: &'static [&'static [TreeReduceOp]] = { - use TreeReduceOp::{Input as I, Reduce as R}; - &[ - &[], - &[I], - &[I, I, R], - &[I, I, R, I, R], - &[I, I, R, I, I, R, R], - &[I, I, R, I, I, R, R, I, R], - &[I, I, R, I, I, R, R, I, I, R, R], - &[I, I, R, I, I, R, R, I, I, R, I, R, R], - &[I, I, R, I, I, R, R, I, I, R, I, I, R, R, R], - ] - }; - for len in 0..64 { - let mut expected = vec![]; - recursive_tree_reduce(0..len, &mut expected); - if let Some(&expected2) = EXPECTED.get(len) { - assert_eq!(*expected, *expected2, "len={len}"); - } - assert_eq!( - TreeReduceOps::new(len).collect::>(), - expected, - "len={len}" - ); - let seq: Vec<_> = (0..len).collect(); - assert_eq!( - seq, - tree_reduce(seq.iter().map(|&v| vec![v]), |mut l, r| { - l.extend_from_slice(&r); - l - }) - .unwrap_or_default() - ); - } - } -}