From 0331681cae5da0111e01e03b760cc340c2bbbf27 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 5 Mar 2025 23:50:38 -0800 Subject: [PATCH] WIP: splitting reg_alloc --- crates/cpu/src/config.rs | 17 +- crates/cpu/src/instruction.rs | 46 +---- crates/cpu/src/instruction_rename.rs | 262 +++++++++++++++++++++++++++ crates/cpu/src/lib.rs | 2 + crates/cpu/src/reg_alloc.rs | 171 +++-------------- crates/cpu/src/rename_table.rs | 187 +++++++++++++++++++ crates/cpu/src/util.rs | 14 +- crates/cpu/src/util/array_vec.rs | 63 ++++++- crates/cpu/src/util/tree_reduce.rs | 152 ---------------- 9 files changed, 567 insertions(+), 347 deletions(-) create mode 100644 crates/cpu/src/instruction_rename.rs create mode 100644 crates/cpu/src/rename_table.rs delete mode 100644 crates/cpu/src/util/tree_reduce.rs diff --git a/crates/cpu/src/config.rs b/crates/cpu/src/config.rs index 5be163c..f6dbaeb 100644 --- a/crates/cpu/src/config.rs +++ b/crates/cpu/src/config.rs @@ -2,6 +2,7 @@ // See Notices.txt for copyright information use crate::{ instruction::{MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum, CONST_ZERO_UNIT_NUM}, + reg_alloc::RetireQueueIndex, unit::{ unit_base::{UnitForwardingInfo, UnitToRegAlloc}, UnitCancelInput, UnitKind, UnitOutputWrite, @@ -35,7 +36,6 @@ pub struct CpuConfig { pub fetch_width: NonZeroUsize, /// default value for [`UnitConfig::max_in_flight`] pub default_unit_max_in_flight: NonZeroUsize, - pub rob_size: NonZeroUsize, } impl CpuConfig { @@ -52,13 +52,12 @@ impl CpuConfig { }; v }; - pub fn new(units: Vec, rob_size: NonZeroUsize) -> Self { + pub fn new(units: Vec) -> Self { Self { units, out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH, fetch_width: Self::DEFAULT_FETCH_WIDTH, default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT, - rob_size, } } pub fn non_const_unit_nums(&self) -> std::ops::Range { @@ -79,9 +78,6 @@ impl CpuConfig { pub fn p_reg_num_width(&self) -> usize { self.unit_num_width() + self.out_reg_num_width } - pub fn renamed_mop_in_unit(&self) -> RenamedMOp, DynSize> { - RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()] - } pub fn unit_output_write(&self) -> UnitOutputWrite { UnitOutputWrite[self.out_reg_num_width] } @@ -116,4 +112,13 @@ impl CpuConfig { UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width] [self.non_const_unit_nums().len()] } + pub fn retire_queue_index_width(&self) -> usize { + let max_in_flight: usize = (0..self.units.len()) + .map(|unit_index| self.unit_max_in_flight(unit_index).get()) + .sum(); + 2 + max_in_flight.next_power_of_two().ilog2() as usize + } + pub fn retire_queue_index(&self) -> RetireQueueIndex { + RetireQueueIndex[self.retire_queue_index_width()] + } } diff --git a/crates/cpu/src/instruction.rs b/crates/cpu/src/instruction.rs index 80dd9d5..6bdea08 100644 --- a/crates/cpu/src/instruction.rs +++ b/crates/cpu/src/instruction.rs @@ -910,11 +910,9 @@ impl MOpRegNum { // // TODO: maybe add more registers later. pub const FLAG_REG_NUMS: Range = 0xFE..0x100; - /// registers handled by a special small rename table (for flags and stuff, since it has more read/write ports) - pub const SPECIAL_REG_NUMS: Range = Self::FLAG_REG_NUMS; - /// registers handled by the large rename table for normal registers (has less read/write ports) - pub const NORMAL_REG_NUMS: Range = - Self::CONST_ZERO_REG_NUM + 1..Self::SPECIAL_REG_NUMS.start; + /// registers that aren't constants + pub const NON_CONST_REG_NUMS: Range = + Self::CONST_ZERO_REG_NUM + 1..Self::FLAG_REG_NUMS.end; } #[hdl(cmp_eq)] @@ -929,29 +927,6 @@ pub struct MOpDestReg { pub flag_regs: Array, { range_u32_len(&MOpRegNum::FLAG_REG_NUMS) }>, } -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)] -pub enum RenameTableName { - /// the large rename table for normal registers (has less read/write ports) - Normal, - /// a special small rename table (for flags and stuff, since it has more read/write ports) - Special, -} - -impl RenameTableName { - pub const fn reg_range(self) -> std::ops::Range { - match self { - Self::Normal => MOpRegNum::NORMAL_REG_NUMS, - Self::Special => MOpRegNum::SPECIAL_REG_NUMS, - } - } - pub const fn as_str(self) -> &'static str { - match self { - Self::Normal => "rename_table_normal", - Self::Special => "rename_table_special", - } - } -} - #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] pub enum MOpDestRegKind { NormalReg { @@ -989,16 +964,13 @@ impl fmt::Display for MOpDestRegName { } impl MOpDestRegKind { - pub const fn reg_range(self) -> std::ops::Range { + pub const fn reg_num_range(self) -> std::ops::Range { match self { - Self::NormalReg { .. } => MOpRegNum::NORMAL_REG_NUMS, - Self::FlagReg { .. } => MOpRegNum::FLAG_REG_NUMS, - } - } - pub const fn rename_table_names(self) -> &'static [RenameTableName] { - match self { - Self::NormalReg { .. } => &[RenameTableName::Normal, RenameTableName::Special], - Self::FlagReg { .. } => &[RenameTableName::Special], + Self::NormalReg { flag_reg_index: _ } => MOpRegNum::NON_CONST_REG_NUMS, + Self::FlagReg { + reg_num, + flag_reg_index: _, + } => reg_num..reg_num + 1, } } pub fn fixed_reg_num(self) -> Option { diff --git a/crates/cpu/src/instruction_rename.rs b/crates/cpu/src/instruction_rename.rs new file mode 100644 index 0000000..01bbba7 --- /dev/null +++ b/crates/cpu/src/instruction_rename.rs @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::CpuConfig, + instruction::{MOp, MOpDestReg, MOpRegNum, MOpTrait, PRegNum, RenamedMOp}, + reg_alloc::RetireQueueIndex, + rename_table::{rename_table, RenameTablePortConfig}, + unit::UnitMOp, + util::array_vec::{ArrayVec, Length, ReadyValidArray}, +}; +use fayalite::{ + prelude::*, + util::{prefix_sum::PrefixSumAlgorithm, ready_valid::ReadyValid}, +}; + +#[hdl] +pub struct RenamedInsnDest { + pub retire_queue_index: RetireQueueIndex, + pub p_reg_num: PRegNum, +} + +impl CpuConfig { + pub fn renamed_insn_dest(&self) -> RenamedInsnDest { + RenamedInsnDest[self.unit_num_width()][self.out_reg_num_width] + [self.retire_queue_index_width()] + } + pub fn renamed_mop_in_unit( + &self, + ) -> RenamedMOp, DynSize> { + RenamedMOp[self.renamed_insn_dest()][self.p_reg_num_width()] + } +} + +#[hdl] +pub struct InstructionRenameInputInsn { + pub mop: MOp, + pub renamed_dest: PRegNum, +} + +impl CpuConfig { + pub fn instruction_rename_input_insn(&self) -> InstructionRenameInputInsn { + InstructionRenameInputInsn[self.unit_num_width()][self.out_reg_num_width] + } +} + +#[hdl] +struct InsnsInPrefixSummary { + all_ready: Bool, + ready_count: Length, + retire_queue_used: Length, +} + +#[hdl_module] +pub fn instruction_rename(config: &CpuConfig) { + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let insns_in: ReadyValidArray, DynSize> = + m.input(ReadyValidArray[config.instruction_rename_input_insn()][config.fetch_width.get()]); + #[hdl] + let start_retire_queue_index: RetireQueueIndex = m.input(config.retire_queue_index()); + #[hdl] + let end_retire_queue_index: RetireQueueIndex = m.output(config.retire_queue_index()); + #[hdl] + let insns_out: Array< + ReadyValid, DynSize>>, + > = m.output( + Array[ReadyValid[RenamedMOp[config.renamed_insn_dest()][config.p_reg_num_width()]]] + [config.fetch_width.get()], + ); + + // TODO: handle resetting table after cancelling instructions + + #[hdl] + let insns_ready_or_move = wire(Array[Bool][config.fetch_width.get()]); + + for (insn_ready_or_move, insn_out) in insns_ready_or_move.into_iter().zip(insns_out) { + connect(insn_ready_or_move, insn_out.ready); + } + + ArrayVec::for_each(insns_in.data, |fetch_index, input_insn| { + #[hdl] + match input_insn.mop { + UnitMOp::<_, _, _>::TransformedMove(_) => { + connect(insns_ready_or_move[fetch_index], true); + } + UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {} + } + }); + + let insns_in_prefix_summary_ty = InsnsInPrefixSummary[config.fetch_width.get()]; + #[hdl] + let insns_in_prefix_summaries = + wire(Array[insns_in_prefix_summary_ty][config.fetch_width.get()]); + let insns_in_prefix_summaries_vec = PrefixSumAlgorithm::WorkEfficient.run( + (0..config.fetch_width.get()).map(|fetch_index| { + #[hdl] + let insns_in_prefix_summary_in = wire(insns_in_prefix_summary_ty); + #[hdl] + let InsnsInPrefixSummary::<_> { + all_ready, + ready_count, + retire_queue_used, + } = insns_in_prefix_summary_in; + connect(all_ready, insns_out[fetch_index].ready); + connect( + ready_count, + Expr::ty(ready_count).cast_from_uint_unchecked(all_ready.cast_to(UInt[1])), + ); + connect(retire_queue_used, Expr::ty(retire_queue_used).zero()); + #[hdl] + if let HdlSome(input_insn) = ArrayVec::get(insns_in.data, fetch_index) { + connect(retire_queue_used, ready_count); + #[hdl] + match input_insn.mop { + UnitMOp::<_, _, _>::TransformedMove(_) => { + connect(all_ready, true); + connect(retire_queue_used, Expr::ty(retire_queue_used).zero()); + } + UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {} + } + } + insns_in_prefix_summary_in + }), + |l, r| { + #[hdl] + let insns_in_prefix_summary_merge = wire(insns_in_prefix_summary_ty); + #[hdl] + let InsnsInPrefixSummary::<_> { + all_ready, + ready_count, + retire_queue_used, + } = insns_in_prefix_summary_merge; + connect(all_ready, l.all_ready & r.all_ready); + #[hdl] + if l.all_ready { + connect( + ready_count, + Expr::ty(ready_count).cast_from_uint_unchecked( + Length::as_uint(l.ready_count) + Length::as_uint(r.ready_count), + ), + ); + connect( + retire_queue_used, + Expr::ty(retire_queue_used).cast_from_uint_unchecked( + Length::as_uint(l.retire_queue_used) + Length::as_uint(r.retire_queue_used), + ), + ); + } else { + connect(ready_count, l.ready_count); + connect(retire_queue_used, l.retire_queue_used); + } + insns_in_prefix_summary_merge + }, + ); + for (l, r) in insns_in_prefix_summaries + .into_iter() + .zip(insns_in_prefix_summaries_vec) + { + connect(l, r); + } + connect( + insns_in.ready, + insns_in_prefix_summaries[config.fetch_width.get() - 1].ready_count, + ); + + #[hdl] + let retire_queue_indexes = + wire(Array[config.retire_queue_index()][config.fetch_width.get() + 1]); + connect(retire_queue_indexes[0], start_retire_queue_index); + connect( + end_retire_queue_index, + retire_queue_indexes[config.fetch_width.get()], + ); + for (retire_queue_index, insns_in_prefix_summary) in retire_queue_indexes + .into_iter() + .skip(1) + .zip(insns_in_prefix_summaries) + { + connect_any( + retire_queue_index.index, + start_retire_queue_index.index + + Length::as_uint(insns_in_prefix_summary.retire_queue_used), + ); + } + + let mut port_configs = Vec::new(); + let mut src_reg_count = 0; + MOpTrait::for_each_src_reg(MOp.uninit(), &mut |_, src_index| { + src_reg_count = src_reg_count.max(src_index + 1); + }); + for _ in 0..config.fetch_width.get() { + for _ in 0..src_reg_count { + port_configs.push(RenameTablePortConfig::Read { + addr_range: MOpRegNum::NON_CONST_REG_NUMS, + }); + } + for dest_reg_kind in MOpDestReg::REG_KINDS { + port_configs.push(RenameTablePortConfig::Write { + addr_range: dest_reg_kind.reg_num_range(), + }); + } + } + + #[hdl] + let rename_table = instance(rename_table(config, &port_configs)); + + connect(rename_table.cd, cd); + + for read_port in rename_table.read_ports { + connect_any(read_port.addr, 0_hdl_u0); + } + for write_port in rename_table.write_ports { + connect_any(write_port.addr, 0_hdl_u0); + connect_any(write_port.data, config.p_reg_num().const_zero()); + } + + let mut read_port_index = 0; + let mut write_port_index = 0; + ArrayVec::for_each( + ReadyValidArray::firing_data(insns_in), + |fetch_index, input_insn| { + #[hdl] + let InstructionRenameInputInsn::<_, _> { mop, renamed_dest } = input_insn; + let new_dest = #[hdl] + RenamedInsnDest::<_, _, _> { + retire_queue_index: retire_queue_indexes[fetch_index], + p_reg_num: renamed_dest, + }; + let insn_out = MOpTrait::map_regs( + mop, + new_dest, + config.p_reg_num_width(), + &mut |src_reg, src_index| { + connect( + rename_table.read_ports[read_port_index + src_index].addr, + src_reg.cast_bits_to(MOpRegNum), + ); + rename_table.read_ports[read_port_index + src_index] + .data + .cast_to_bits() + }, + ); + let insn_out = UnitMOp::try_with_transformed_move_op( + insn_out, + config.renamed_mop_in_unit().TransformedMove, + |v: Expr>, _| connect(v, Expr::ty(v).HdlNone()), + ); + connect(insns_out[fetch_index].data, insn_out); + read_port_index += src_reg_count; + for dest_reg in MOpDestReg::regs(MOpTrait::dest_reg(mop)) { + connect(rename_table.write_ports[write_port_index].addr, dest_reg); + connect( + rename_table.write_ports[write_port_index].data, + renamed_dest, + ); + write_port_index += 1; + } + }, + ); +} diff --git a/crates/cpu/src/lib.rs b/crates/cpu/src/lib.rs index bae3720..766811d 100644 --- a/crates/cpu/src/lib.rs +++ b/crates/cpu/src/lib.rs @@ -2,7 +2,9 @@ // See Notices.txt for copyright information pub mod config; pub mod instruction; +pub mod instruction_rename; pub mod reg_alloc; pub mod register; +pub mod rename_table; pub mod unit; pub mod util; diff --git a/crates/cpu/src/reg_alloc.rs b/crates/cpu/src/reg_alloc.rs index 6e7bc5d..b79eace 100644 --- a/crates/cpu/src/reg_alloc.rs +++ b/crates/cpu/src/reg_alloc.rs @@ -7,13 +7,13 @@ use crate::{ COMMON_MOP_SRC_LEN, }, unit::{ - unit_base::{UnitForwardingInfo, UnitInput}, - GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite, UnitResult, - UnitResultCompleted, UnitTrait, + unit_base::UnitInput, GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite, + UnitResult, UnitResultCompleted, UnitTrait, }, - util::tree_reduce::tree_reduce_with_state, + util::array_vec::ReadyValidArray, }; use fayalite::{ + int::BoolOrIntType, memory::{splat_mask, WriteStruct}, module::{instance_with_loc, memory_with_loc, wire_with_loc}, prelude::*, @@ -44,147 +44,30 @@ pub enum FetchDecodeSpecialOp { #[hdl] pub struct FetchDecodeInterface { - pub decoded_insns: ArrayType, FetchWidth>, + pub decoded_insns: ReadyValidArray, #[hdl(flip)] pub fetch_decode_special_op: ReadyValid, } -#[hdl] -struct ROBRenamedInsn { - mop_dest: MOpDestReg, - p_dest: PRegNum, +/// index into the retire queue (the virtual queue of instructions that haven't yet retired) +#[hdl(cmp_eq)] +pub struct RetireQueueIndex { + /// increases by one for each instruction added to the retire queue. + /// + /// this wraps around, so you must not compare it using `cmp_lt`/`cmp_gt` + /// but instead must use [`Self::insns_until`] and compare the output with zero. + pub index: UIntType, } -#[hdl] -struct ROBEntry { - renamed_insn: ROBRenamedInsn, - dest_written: Bool, -} - -#[hdl_module] -fn rob(config: &CpuConfig) { - #[hdl] - let cd: ClockDomain = m.input(); - #[hdl] - let renamed_insns_in: Array>> = m.input( - Array[ReadyValid[ROBRenamedInsn[config.unit_num_width()][config.out_reg_num_width]]] - [config.fetch_width.get()], - ); - #[hdl] - let unit_forwarding_info: UnitForwardingInfo = - m.input(config.unit_forwarding_info()); - - let rob_entry_ty = ROBEntry[config.unit_num_width()][config.out_reg_num_width]; - #[hdl] - let rob = reg_builder() - .clock_domain(cd) - .no_reset(Array[rob_entry_ty][config.rob_size.get()]); - #[hdl] - let rob_valid_start = reg_builder() - .clock_domain(cd) - .reset(UInt::range(0..config.rob_size.get()).zero()); - #[hdl] - let rob_valid_end = reg_builder() - .clock_domain(cd) - .reset(UInt::range(0..config.rob_size.get()).zero()); - #[hdl] - let free_space = wire(UInt::range_inclusive(0..=config.rob_size.get())); - #[hdl] - if rob_valid_end.cmp_lt(rob_valid_start) { - // rob_valid_end wrapped around but start didn't - connect_any( - free_space, - rob_valid_end + config.rob_size.get() - rob_valid_start, - ); - } else { - connect_any(free_space, rob_valid_end - rob_valid_start); - } - - struct IndexAndRange { - index: Expr, - range: std::ops::Range, - } - - let mut next_write_index = IndexAndRange { - index: rob_valid_end, - range: 0..config.rob_size.get(), - }; - for fetch_index in 0..config.fetch_width.get() { - let write_index = next_write_index; - let next_write_index_range = write_index.range.start..write_index.range.end + 1; - next_write_index = IndexAndRange { - index: wire_with_loc( - &format!("next_write_index_{fetch_index}"), - SourceLocation::caller(), - UInt::range(next_write_index_range.clone()), - ), - range: next_write_index_range, - }; - connect( - renamed_insns_in[fetch_index].ready, - fetch_index.cmp_lt(free_space), - ); - #[hdl] - if let HdlSome(renamed_insn) = ReadyValid::firing_data(renamed_insns_in[fetch_index]) { - for i in write_index.range.clone() { - #[hdl] - if write_index.index.cmp_eq(i) { - connect( - rob[i % config.rob_size.get()], - #[hdl] - ROBEntry { - renamed_insn, - dest_written: false, - }, - ); - } - } - } - // TODO: optimize write_index chain better - connect_any( - next_write_index.index, - write_index.index - + ReadyValid::firing(renamed_insns_in[fetch_index]).cast_to_static::>(), - ); - } - assert!( - config.rob_size >= config.fetch_width, - "rob_size ({}) is too small for fetch_width = {} -- next_write_index would overflow", - config.rob_size, - config.fetch_width, - ); - #[hdl] - if next_write_index.index.cmp_lt(config.rob_size.get()) { - connect_any(rob_valid_end, next_write_index.index); - } else { - connect_any( - rob_valid_end, - next_write_index.index - config.rob_size.get(), - ); - } - - // TODO: optimize better, O(rob_size * unit_count) is too big here - for rob_index in 0..config.rob_size.get() { - for unit_index in 0..config.non_const_unit_nums().len() { - #[hdl] - if let HdlSome(unit_output_write) = unit_forwarding_info.unit_output_writes[unit_index] - { - #[hdl] - let UnitOutputWrite::<_> { - which: unit_out_reg, - value: _, - } = unit_output_write; - let p_reg_num = #[hdl] - PRegNum::<_, _> { - unit_num: config.unit_num().from_index(unit_index), - unit_out_reg, - }; - #[hdl] - if rob[rob_index].renamed_insn.p_dest.cmp_eq(p_reg_num) { - connect(rob[rob_index].dest_written, true); - } - } - } +impl RetireQueueIndex { + pub fn insns_until( + this: impl ToExpr, + target: impl ToExpr, + ) -> Expr> { + let this = this.to_expr(); + let target = target.to_expr(); + assert_eq!(Expr::ty(this), Expr::ty(target)); + (this.index - target.index).cast_to(Expr::ty(this).index.as_same_width_sint()) } } @@ -205,10 +88,6 @@ pub fn reg_alloc(config: &CpuConfig) { ); // TODO: finish - #[hdl] - let rob = instance(rob(config)); - connect(rob.cd, cd); - let mut rename_table_mems = BTreeMap::>::new(); for reg_kind in MOpDestReg::REG_KINDS { @@ -238,11 +117,6 @@ pub fn reg_alloc(config: &CpuConfig) { #[hdl] let renamed_mops_out_reg = wire(Array[HdlOption[config.p_reg_num()]][config.fetch_width.get()]); for fetch_index in 0..config.fetch_width.get() { - // TODO: finish - connect( - rob.renamed_insns_in[fetch_index].data, - Expr::ty(rob).renamed_insns_in.element().data.HdlNone(), - ); // TODO: finish connect( fetch_decode_interface.decoded_insns[fetch_index].ready, @@ -483,7 +357,6 @@ pub fn reg_alloc(config: &CpuConfig) { ); #[hdl] let unit_forwarding_info = wire(config.unit_forwarding_info()); - connect(rob.unit_forwarding_info, unit_forwarding_info); for (unit_index, unit_config) in config.units.iter().enumerate() { let dyn_unit = unit_config.kind.unit(config, unit_index); let unit = instance_with_loc( diff --git a/crates/cpu/src/rename_table.rs b/crates/cpu/src/rename_table.rs new file mode 100644 index 0000000..b88f696 --- /dev/null +++ b/crates/cpu/src/rename_table.rs @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use crate::{ + config::CpuConfig, + instruction::{MOpRegNum, PRegNum}, + util::range_intersection, +}; +use fayalite::{ + memory::{splat_mask, ReadStruct, WriteStruct}, + module::memory_with_loc, + prelude::*, +}; +use std::{mem, ops::Range}; + +#[hdl] +pub struct RenameTableReadPort { + pub addr: MOpRegNum, + #[hdl(flip)] + pub data: PRegNum, +} + +#[hdl] +pub struct RenameTableWritePort { + pub addr: MOpRegNum, + pub data: PRegNum, +} + +#[derive(Clone, Debug)] +pub enum RenameTablePortConfig { + Read { addr_range: Range }, + Write { addr_range: Range }, +} + +/// register rename table. +/// all read/write operations are done in the order of `port_configs`. +/// So if `port_configs[0]` is a write and `port_configs[1]` is a read, +/// then the read port will combinatorially return data written by the +/// write port in the *same* clock cycle. However, if `port_configs[0]` +/// is a read and `port_configs[1]` is a write, then the read port will +/// not see the data written by the write port until the *next* clock cycle. +#[hdl_module] +pub fn rename_table(config: &CpuConfig, port_configs: &[RenameTablePortConfig]) { + let read_count = port_configs + .iter() + .filter(|v| matches!(v, RenameTablePortConfig::Read { .. })) + .count(); + let write_count = port_configs + .iter() + .filter(|v| matches!(v, RenameTablePortConfig::Write { .. })) + .count(); + + #[hdl] + let cd: ClockDomain = m.input(); + #[hdl] + let read_ports: Array> = m.input( + Array[RenameTableReadPort[config.unit_num_width()][config.out_reg_num_width]][read_count], + ); + #[hdl] + let write_ports: Array> = m.input( + Array[RenameTableWritePort[config.unit_num_width()][config.out_reg_num_width]][write_count], + ); + + for read_port in read_ports { + connect(read_port.data, config.p_reg_num().const_zero()); + } + + let port_configs_and_indexes = port_configs.iter().scan( + (0usize, 0), + |(read_port_index, write_port_index), port_config| { + Some(( + port_config, + match port_config { + RenameTablePortConfig::Read { .. } => { + mem::replace(read_port_index, *read_port_index + 1) + } + RenameTablePortConfig::Write { .. } => { + mem::replace(write_port_index, *write_port_index + 1) + } + }, + )) + }, + ); + + let mut range_transitions = Vec::with_capacity(port_configs.len() * 2); + for port_config in port_configs { + let (RenameTablePortConfig::Read { addr_range } + | RenameTablePortConfig::Write { addr_range }) = port_config; + range_transitions.push(addr_range.start); + range_transitions.push(addr_range.end); + } + range_transitions.sort_unstable(); + range_transitions.dedup(); + let mut last_range_transition = None; + for range_transition in range_transitions { + let Some(last_range_transition) = last_range_transition.replace(range_transition) else { + continue; + }; + let cur_addr_range = last_range_transition..range_transition; + let mut mem = memory_with_loc( + &if cur_addr_range.len() == 1 { + format!("mem_{:#x}", cur_addr_range.start) + } else { + format!("mem_{:#x}_{:#x}", cur_addr_range.start, cur_addr_range.end) + }, + config.p_reg_num(), + SourceLocation::caller(), + ); + mem.depth(cur_addr_range.len()); + let addr_in_range = |addr: Expr| { + if cur_addr_range.len() == 1 { + addr.value.cmp_eq(cur_addr_range.start) + } else { + addr.value.cmp_ge(cur_addr_range.start) & addr.value.cmp_lt(cur_addr_range.end) + } + }; + for (port_config, port_index) in port_configs_and_indexes.clone() { + match port_config { + RenameTablePortConfig::Read { addr_range } => { + if range_intersection(&addr_range, &cur_addr_range).is_none() { + continue; + } + let port = read_ports[port_index]; + #[hdl] + let ReadStruct::<_, _> { + addr, + en, + clk, + data, + } = mem.new_read_port(); + connect_any(addr, port.addr.value - cur_addr_range.start); + connect(en, addr_in_range(port.addr)); + connect(clk, cd.clk); + #[hdl] + if en { + connect(port.data, data); + } + } + RenameTablePortConfig::Write { addr_range } => { + if range_intersection(&addr_range, &cur_addr_range).is_none() { + continue; + } + let port = write_ports[port_index]; + #[hdl] + let WriteStruct::<_, _> { + addr, + en, + clk, + data, + mask, + } = mem.new_write_port(); + connect_any(addr, port.addr.value - cur_addr_range.start); + connect(en, addr_in_range(port.addr)); + connect(clk, cd.clk); + connect(data, port.data); + connect(mask, splat_mask(Expr::ty(port).data, true.to_expr())); + } + } + } + } + for (port_config_index, (port_config, port_index)) in + port_configs_and_indexes.clone().enumerate() + { + let RenameTablePortConfig::Read { addr_range } = port_config else { + continue; + }; + let port = read_ports[port_index]; + for (prev_port_config, prev_port_index) in + port_configs_and_indexes.clone().take(port_config_index) + { + let RenameTablePortConfig::Write { + addr_range: prev_addr_range, + } = prev_port_config + else { + continue; + }; + if range_intersection(addr_range, prev_addr_range).is_none() { + continue; + } + let prev_port = write_ports[prev_port_index]; + #[hdl] + if prev_port.addr.cmp_eq(port.addr) { + connect(port.data, prev_port.data); + } + } + } +} diff --git a/crates/cpu/src/util.rs b/crates/cpu/src/util.rs index 0b53274..f57003a 100644 --- a/crates/cpu/src/util.rs +++ b/crates/cpu/src/util.rs @@ -2,7 +2,6 @@ // See Notices.txt for copyright information pub mod array_vec; -pub mod tree_reduce; pub(crate) const fn range_u32_len(range: &std::ops::Range) -> usize { let retval = range.end.saturating_sub(range.start); @@ -25,3 +24,16 @@ pub(crate) const fn range_u32_nth_or_panic(range: &std::ops::Range, index: panic!("index out of range") } } + +pub(crate) const fn range_intersection( + a: &std::ops::Range, + b: &std::ops::Range, +) -> Option> { + let start = if a.start > b.start { a.start } else { b.start }; + let end = if a.end < b.end { a.end } else { b.end }; + if start < end { + Some(start..end) + } else { + None + } +} diff --git a/crates/cpu/src/util/array_vec.rs b/crates/cpu/src/util/array_vec.rs index 761f53f..69084c6 100644 --- a/crates/cpu/src/util/array_vec.rs +++ b/crates/cpu/src/util/array_vec.rs @@ -2,11 +2,15 @@ // See Notices.txt for copyright information use fayalite::{ - expr::ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd}, - int::SizeType, + expr::{ + ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd}, + ToLiteralBits, + }, + int::{IntType, SizeType}, intern::{Intern, Interned}, prelude::*, ty::{MatchVariantWithoutScope, StaticType, TypeProperties}, + util::ConstBool, }; use std::{marker::PhantomData, ops::Index}; @@ -249,6 +253,29 @@ impl ArrayVec { }); array_vec_as_array_of_options } + #[hdl] + pub fn get>( + this: impl ToExpr, + index: impl ToExpr, + ) -> Expr> { + let this = this.to_expr(); + let index = Expr::as_dyn_int(index.to_expr()); + let never_in_bounds = index.cmp_ge(Expr::ty(this).capacity()); + if let Ok(never_in_bounds) = never_in_bounds.to_literal_bits() { + if never_in_bounds[0] { + // avoid error from out-of-bounds constant index + return HdlOption[Expr::ty(this).element()].HdlNone(); + } + } + #[hdl] + let array_vec_get = wire(HdlOption[Expr::ty(this).element()]); + connect(array_vec_get, Expr::ty(array_vec_get).HdlNone()); + #[hdl] + if index.cmp_lt(Length::as_uint(Self::len(this))) { + connect(array_vec_get, HdlSome(this.elements[index])); + } + array_vec_get + } } impl ExprIndex for ArrayVec @@ -263,3 +290,35 @@ where as ExprIndex>::expr_index(&this.elements, index) } } + +#[hdl] +pub struct ReadyValidArray { + pub data: ArrayVec, + #[hdl(flip)] + pub ready: Length, +} + +impl ReadyValidArray { + #[hdl] + pub fn firing_len(this: impl ToExpr) -> Expr> { + let this = this.to_expr(); + assert_eq!(Expr::ty(this).data.len_ty(), Expr::ty(this).ready); + #[hdl] + let firing_len = wire(Expr::ty(this).data.len); + connect(firing_len, this.data.len); + #[hdl] + if this.data.len.cmp_gt(this.ready) { + connect(firing_len, this.ready); + } + firing_len + } + #[hdl] + pub fn firing_data(this: impl ToExpr) -> Expr> { + let this = this.to_expr(); + #[hdl] + let firing_data = wire(Expr::ty(this).data); + connect(firing_data, this.data); + connect(firing_data.len, Self::firing_len(this)); + firing_data + } +} diff --git a/crates/cpu/src/util/tree_reduce.rs b/crates/cpu/src/util/tree_reduce.rs deleted file mode 100644 index c8d12f7..0000000 --- a/crates/cpu/src/util/tree_reduce.rs +++ /dev/null @@ -1,152 +0,0 @@ -// SPDX-License-Identifier: LGPL-3.0-or-later -// See Notices.txt for copyright information -#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] -pub enum TreeReduceOp { - Input, - Reduce, -} - -#[derive(Copy, Clone, Debug)] -struct Entry { - start: usize, - depth: u32, -} - -#[derive(Clone, Debug)] -pub struct TreeReduceOps { - len: usize, - stack: Vec, -} - -impl TreeReduceOps { - pub fn new(len: usize) -> Self { - TreeReduceOps { - len, - stack: Vec::new(), - } - } -} - -impl Iterator for TreeReduceOps { - type Item = TreeReduceOp; - fn next(&mut self) -> Option { - match *self.stack { - [] if self.len != 0 => { - self.stack.push(Entry { start: 0, depth: 0 }); - Some(TreeReduceOp::Input) - } - [.., ref mut second_last, last] if second_last.depth == last.depth => { - second_last.depth += 1; - self.stack.pop(); - Some(TreeReduceOp::Reduce) - } - [.., last] if self.len - last.start > 1 << last.depth => { - let start = last.start + (1 << last.depth); - self.stack.push(Entry { start, depth: 0 }); - Some(TreeReduceOp::Input) - } - [.., ref mut second_last, _] => { - second_last.depth += 1; - self.stack.pop(); - Some(TreeReduceOp::Reduce) - } - _ => None, - } - } -} - -#[track_caller] -pub fn tree_reduce_with_state( - iter: impl IntoIterator, - state: &mut S, - mut input: impl FnMut(&mut S, I) -> R, - mut reduce: impl FnMut(&mut S, R, R) -> R, -) -> Option { - let mut stack = Vec::new(); - let mut iter = iter.into_iter(); - for op in TreeReduceOps::new(iter.len()) { - match op { - TreeReduceOp::Input => stack.push(input( - state, - iter.next().expect("inconsistent iterator len() and next()"), - )), - TreeReduceOp::Reduce => { - let Some(r) = stack.pop() else { - unreachable!(); - }; - let Some(l) = stack.pop() else { - unreachable!(); - }; - stack.push(reduce(state, l, r)); - } - } - } - stack.pop() -} - -pub fn tree_reduce( - iter: impl IntoIterator, - mut reduce: impl FnMut(T, T) -> T, -) -> Option { - tree_reduce_with_state(iter, &mut (), |_, v| v, move |_, l, r| reduce(l, r)) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::ops::Range; - - fn recursive_tree_reduce(range: Range, ops: &mut Vec) { - if range.len() == 1 { - ops.push(TreeReduceOp::Input); - return; - } - if range.is_empty() { - return; - } - let pow2_len = range.len().next_power_of_two(); - let split = range.start + pow2_len / 2; - recursive_tree_reduce(range.start..split, ops); - recursive_tree_reduce(split..range.end, ops); - ops.push(TreeReduceOp::Reduce); - } - - #[test] - fn test_tree_reduce() { - const EXPECTED: &'static [&'static [TreeReduceOp]] = { - use TreeReduceOp::{Input as I, Reduce as R}; - &[ - &[], - &[I], - &[I, I, R], - &[I, I, R, I, R], - &[I, I, R, I, I, R, R], - &[I, I, R, I, I, R, R, I, R], - &[I, I, R, I, I, R, R, I, I, R, R], - &[I, I, R, I, I, R, R, I, I, R, I, R, R], - &[I, I, R, I, I, R, R, I, I, R, I, I, R, R, R], - ] - }; - for len in 0..64 { - let mut expected = vec![]; - recursive_tree_reduce(0..len, &mut expected); - if let Some(&expected2) = EXPECTED.get(len) { - assert_eq!(*expected, *expected2, "len={len}"); - } - assert_eq!( - TreeReduceOps::new(len).collect::>(), - expected, - "len={len}" - ); - let seq: Vec<_> = (0..len).collect(); - assert_eq!( - seq, - tree_reduce(seq.iter().map(|&v| vec![v]), |mut l, r| { - l.extend_from_slice(&r); - l - }) - .unwrap_or_default() - ); - } - } -}