WIP: splitting reg_alloc
Some checks failed
/ deps (push) Successful in 14s
/ test (push) Failing after 1m55s

This commit is contained in:
Jacob Lifshay 2025-03-05 23:50:38 -08:00
parent 60341e22af
commit 0331681cae
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
9 changed files with 567 additions and 347 deletions

View file

@ -2,6 +2,7 @@
// See Notices.txt for copyright information
use crate::{
instruction::{MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum, CONST_ZERO_UNIT_NUM},
reg_alloc::RetireQueueIndex,
unit::{
unit_base::{UnitForwardingInfo, UnitToRegAlloc},
UnitCancelInput, UnitKind, UnitOutputWrite,
@ -35,7 +36,6 @@ pub struct CpuConfig {
pub fetch_width: NonZeroUsize,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
}
impl CpuConfig {
@ -52,13 +52,12 @@ impl CpuConfig {
};
v
};
pub fn new(units: Vec<UnitConfig>, rob_size: NonZeroUsize) -> Self {
pub fn new(units: Vec<UnitConfig>) -> Self {
Self {
units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
}
pub fn non_const_unit_nums(&self) -> std::ops::Range<usize> {
@ -79,9 +78,6 @@ impl CpuConfig {
pub fn p_reg_num_width(&self) -> usize {
self.unit_num_width() + self.out_reg_num_width
}
pub fn renamed_mop_in_unit(&self) -> RenamedMOp<UnitOutRegNum<DynSize>, DynSize> {
RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()]
}
pub fn unit_output_write(&self) -> UnitOutputWrite<DynSize> {
UnitOutputWrite[self.out_reg_num_width]
}
@ -116,4 +112,13 @@ impl CpuConfig {
UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width]
[self.non_const_unit_nums().len()]
}
pub fn retire_queue_index_width(&self) -> usize {
let max_in_flight: usize = (0..self.units.len())
.map(|unit_index| self.unit_max_in_flight(unit_index).get())
.sum();
2 + max_in_flight.next_power_of_two().ilog2() as usize
}
pub fn retire_queue_index(&self) -> RetireQueueIndex<DynSize> {
RetireQueueIndex[self.retire_queue_index_width()]
}
}

View file

@ -910,11 +910,9 @@ impl MOpRegNum {
//
// TODO: maybe add more registers later.
pub const FLAG_REG_NUMS: Range<u32> = 0xFE..0x100;
/// registers handled by a special small rename table (for flags and stuff, since it has more read/write ports)
pub const SPECIAL_REG_NUMS: Range<u32> = Self::FLAG_REG_NUMS;
/// registers handled by the large rename table for normal registers (has less read/write ports)
pub const NORMAL_REG_NUMS: Range<u32> =
Self::CONST_ZERO_REG_NUM + 1..Self::SPECIAL_REG_NUMS.start;
/// registers that aren't constants
pub const NON_CONST_REG_NUMS: Range<u32> =
Self::CONST_ZERO_REG_NUM + 1..Self::FLAG_REG_NUMS.end;
}
#[hdl(cmp_eq)]
@ -929,29 +927,6 @@ pub struct MOpDestReg {
pub flag_regs: Array<HdlOption<()>, { range_u32_len(&MOpRegNum::FLAG_REG_NUMS) }>,
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum RenameTableName {
/// the large rename table for normal registers (has less read/write ports)
Normal,
/// a special small rename table (for flags and stuff, since it has more read/write ports)
Special,
}
impl RenameTableName {
pub const fn reg_range(self) -> std::ops::Range<u32> {
match self {
Self::Normal => MOpRegNum::NORMAL_REG_NUMS,
Self::Special => MOpRegNum::SPECIAL_REG_NUMS,
}
}
pub const fn as_str(self) -> &'static str {
match self {
Self::Normal => "rename_table_normal",
Self::Special => "rename_table_special",
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub enum MOpDestRegKind {
NormalReg {
@ -989,16 +964,13 @@ impl fmt::Display for MOpDestRegName {
}
impl MOpDestRegKind {
pub const fn reg_range(self) -> std::ops::Range<u32> {
pub const fn reg_num_range(self) -> std::ops::Range<u32> {
match self {
Self::NormalReg { .. } => MOpRegNum::NORMAL_REG_NUMS,
Self::FlagReg { .. } => MOpRegNum::FLAG_REG_NUMS,
}
}
pub const fn rename_table_names(self) -> &'static [RenameTableName] {
match self {
Self::NormalReg { .. } => &[RenameTableName::Normal, RenameTableName::Special],
Self::FlagReg { .. } => &[RenameTableName::Special],
Self::NormalReg { flag_reg_index: _ } => MOpRegNum::NON_CONST_REG_NUMS,
Self::FlagReg {
reg_num,
flag_reg_index: _,
} => reg_num..reg_num + 1,
}
}
pub fn fixed_reg_num(self) -> Option<u32> {

View file

@ -0,0 +1,262 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
config::CpuConfig,
instruction::{MOp, MOpDestReg, MOpRegNum, MOpTrait, PRegNum, RenamedMOp},
reg_alloc::RetireQueueIndex,
rename_table::{rename_table, RenameTablePortConfig},
unit::UnitMOp,
util::array_vec::{ArrayVec, Length, ReadyValidArray},
};
use fayalite::{
prelude::*,
util::{prefix_sum::PrefixSumAlgorithm, ready_valid::ReadyValid},
};
#[hdl]
pub struct RenamedInsnDest<UnitNumWidth: Size, OutRegNumWidth: Size, RetireQueueIndexWidth: Size> {
pub retire_queue_index: RetireQueueIndex<RetireQueueIndexWidth>,
pub p_reg_num: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
impl CpuConfig {
pub fn renamed_insn_dest(&self) -> RenamedInsnDest<DynSize, DynSize, DynSize> {
RenamedInsnDest[self.unit_num_width()][self.out_reg_num_width]
[self.retire_queue_index_width()]
}
pub fn renamed_mop_in_unit(
&self,
) -> RenamedMOp<RenamedInsnDest<DynSize, DynSize, DynSize>, DynSize> {
RenamedMOp[self.renamed_insn_dest()][self.p_reg_num_width()]
}
}
#[hdl]
pub struct InstructionRenameInputInsn<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub mop: MOp,
pub renamed_dest: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
impl CpuConfig {
pub fn instruction_rename_input_insn(&self) -> InstructionRenameInputInsn<DynSize, DynSize> {
InstructionRenameInputInsn[self.unit_num_width()][self.out_reg_num_width]
}
}
#[hdl]
struct InsnsInPrefixSummary<FetchWidth: Size> {
all_ready: Bool,
ready_count: Length<FetchWidth>,
retire_queue_used: Length<FetchWidth>,
}
#[hdl_module]
pub fn instruction_rename(config: &CpuConfig) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let insns_in: ReadyValidArray<InstructionRenameInputInsn<DynSize, DynSize>, DynSize> =
m.input(ReadyValidArray[config.instruction_rename_input_insn()][config.fetch_width.get()]);
#[hdl]
let start_retire_queue_index: RetireQueueIndex<DynSize> = m.input(config.retire_queue_index());
#[hdl]
let end_retire_queue_index: RetireQueueIndex<DynSize> = m.output(config.retire_queue_index());
#[hdl]
let insns_out: Array<
ReadyValid<RenamedMOp<RenamedInsnDest<DynSize, DynSize, DynSize>, DynSize>>,
> = m.output(
Array[ReadyValid[RenamedMOp[config.renamed_insn_dest()][config.p_reg_num_width()]]]
[config.fetch_width.get()],
);
// TODO: handle resetting table after cancelling instructions
#[hdl]
let insns_ready_or_move = wire(Array[Bool][config.fetch_width.get()]);
for (insn_ready_or_move, insn_out) in insns_ready_or_move.into_iter().zip(insns_out) {
connect(insn_ready_or_move, insn_out.ready);
}
ArrayVec::for_each(insns_in.data, |fetch_index, input_insn| {
#[hdl]
match input_insn.mop {
UnitMOp::<_, _, _>::TransformedMove(_) => {
connect(insns_ready_or_move[fetch_index], true);
}
UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {}
}
});
let insns_in_prefix_summary_ty = InsnsInPrefixSummary[config.fetch_width.get()];
#[hdl]
let insns_in_prefix_summaries =
wire(Array[insns_in_prefix_summary_ty][config.fetch_width.get()]);
let insns_in_prefix_summaries_vec = PrefixSumAlgorithm::WorkEfficient.run(
(0..config.fetch_width.get()).map(|fetch_index| {
#[hdl]
let insns_in_prefix_summary_in = wire(insns_in_prefix_summary_ty);
#[hdl]
let InsnsInPrefixSummary::<_> {
all_ready,
ready_count,
retire_queue_used,
} = insns_in_prefix_summary_in;
connect(all_ready, insns_out[fetch_index].ready);
connect(
ready_count,
Expr::ty(ready_count).cast_from_uint_unchecked(all_ready.cast_to(UInt[1])),
);
connect(retire_queue_used, Expr::ty(retire_queue_used).zero());
#[hdl]
if let HdlSome(input_insn) = ArrayVec::get(insns_in.data, fetch_index) {
connect(retire_queue_used, ready_count);
#[hdl]
match input_insn.mop {
UnitMOp::<_, _, _>::TransformedMove(_) => {
connect(all_ready, true);
connect(retire_queue_used, Expr::ty(retire_queue_used).zero());
}
UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {}
}
}
insns_in_prefix_summary_in
}),
|l, r| {
#[hdl]
let insns_in_prefix_summary_merge = wire(insns_in_prefix_summary_ty);
#[hdl]
let InsnsInPrefixSummary::<_> {
all_ready,
ready_count,
retire_queue_used,
} = insns_in_prefix_summary_merge;
connect(all_ready, l.all_ready & r.all_ready);
#[hdl]
if l.all_ready {
connect(
ready_count,
Expr::ty(ready_count).cast_from_uint_unchecked(
Length::as_uint(l.ready_count) + Length::as_uint(r.ready_count),
),
);
connect(
retire_queue_used,
Expr::ty(retire_queue_used).cast_from_uint_unchecked(
Length::as_uint(l.retire_queue_used) + Length::as_uint(r.retire_queue_used),
),
);
} else {
connect(ready_count, l.ready_count);
connect(retire_queue_used, l.retire_queue_used);
}
insns_in_prefix_summary_merge
},
);
for (l, r) in insns_in_prefix_summaries
.into_iter()
.zip(insns_in_prefix_summaries_vec)
{
connect(l, r);
}
connect(
insns_in.ready,
insns_in_prefix_summaries[config.fetch_width.get() - 1].ready_count,
);
#[hdl]
let retire_queue_indexes =
wire(Array[config.retire_queue_index()][config.fetch_width.get() + 1]);
connect(retire_queue_indexes[0], start_retire_queue_index);
connect(
end_retire_queue_index,
retire_queue_indexes[config.fetch_width.get()],
);
for (retire_queue_index, insns_in_prefix_summary) in retire_queue_indexes
.into_iter()
.skip(1)
.zip(insns_in_prefix_summaries)
{
connect_any(
retire_queue_index.index,
start_retire_queue_index.index
+ Length::as_uint(insns_in_prefix_summary.retire_queue_used),
);
}
let mut port_configs = Vec::new();
let mut src_reg_count = 0;
MOpTrait::for_each_src_reg(MOp.uninit(), &mut |_, src_index| {
src_reg_count = src_reg_count.max(src_index + 1);
});
for _ in 0..config.fetch_width.get() {
for _ in 0..src_reg_count {
port_configs.push(RenameTablePortConfig::Read {
addr_range: MOpRegNum::NON_CONST_REG_NUMS,
});
}
for dest_reg_kind in MOpDestReg::REG_KINDS {
port_configs.push(RenameTablePortConfig::Write {
addr_range: dest_reg_kind.reg_num_range(),
});
}
}
#[hdl]
let rename_table = instance(rename_table(config, &port_configs));
connect(rename_table.cd, cd);
for read_port in rename_table.read_ports {
connect_any(read_port.addr, 0_hdl_u0);
}
for write_port in rename_table.write_ports {
connect_any(write_port.addr, 0_hdl_u0);
connect_any(write_port.data, config.p_reg_num().const_zero());
}
let mut read_port_index = 0;
let mut write_port_index = 0;
ArrayVec::for_each(
ReadyValidArray::firing_data(insns_in),
|fetch_index, input_insn| {
#[hdl]
let InstructionRenameInputInsn::<_, _> { mop, renamed_dest } = input_insn;
let new_dest = #[hdl]
RenamedInsnDest::<_, _, _> {
retire_queue_index: retire_queue_indexes[fetch_index],
p_reg_num: renamed_dest,
};
let insn_out = MOpTrait::map_regs(
mop,
new_dest,
config.p_reg_num_width(),
&mut |src_reg, src_index| {
connect(
rename_table.read_ports[read_port_index + src_index].addr,
src_reg.cast_bits_to(MOpRegNum),
);
rename_table.read_ports[read_port_index + src_index]
.data
.cast_to_bits()
},
);
let insn_out = UnitMOp::try_with_transformed_move_op(
insn_out,
config.renamed_mop_in_unit().TransformedMove,
|v: Expr<HdlOption<_>>, _| connect(v, Expr::ty(v).HdlNone()),
);
connect(insns_out[fetch_index].data, insn_out);
read_port_index += src_reg_count;
for dest_reg in MOpDestReg::regs(MOpTrait::dest_reg(mop)) {
connect(rename_table.write_ports[write_port_index].addr, dest_reg);
connect(
rename_table.write_ports[write_port_index].data,
renamed_dest,
);
write_port_index += 1;
}
},
);
}

View file

@ -2,7 +2,9 @@
// See Notices.txt for copyright information
pub mod config;
pub mod instruction;
pub mod instruction_rename;
pub mod reg_alloc;
pub mod register;
pub mod rename_table;
pub mod unit;
pub mod util;

View file

@ -7,13 +7,13 @@ use crate::{
COMMON_MOP_SRC_LEN,
},
unit::{
unit_base::{UnitForwardingInfo, UnitInput},
GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite, UnitResult,
UnitResultCompleted, UnitTrait,
unit_base::UnitInput, GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite,
UnitResult, UnitResultCompleted, UnitTrait,
},
util::tree_reduce::tree_reduce_with_state,
util::array_vec::ReadyValidArray,
};
use fayalite::{
int::BoolOrIntType,
memory::{splat_mask, WriteStruct},
module::{instance_with_loc, memory_with_loc, wire_with_loc},
prelude::*,
@ -44,147 +44,30 @@ pub enum FetchDecodeSpecialOp {
#[hdl]
pub struct FetchDecodeInterface<FetchWidth: Size> {
pub decoded_insns: ArrayType<ReadyValid<FetchedDecodedMOp>, FetchWidth>,
pub decoded_insns: ReadyValidArray<FetchedDecodedMOp, FetchWidth>,
#[hdl(flip)]
pub fetch_decode_special_op: ReadyValid<FetchDecodeSpecialOp>,
}
#[hdl]
struct ROBRenamedInsn<UnitNumWidth: Size, OutRegNumWidth: Size> {
mop_dest: MOpDestReg,
p_dest: PRegNum<UnitNumWidth, OutRegNumWidth>,
/// index into the retire queue (the virtual queue of instructions that haven't yet retired)
#[hdl(cmp_eq)]
pub struct RetireQueueIndex<Width: Size> {
/// increases by one for each instruction added to the retire queue.
///
/// this wraps around, so you must not compare it using `cmp_lt`/`cmp_gt`
/// but instead must use [`Self::insns_until`] and compare the output with zero.
pub index: UIntType<Width>,
}
#[hdl]
struct ROBEntry<UnitNumWidth: Size, OutRegNumWidth: Size> {
renamed_insn: ROBRenamedInsn<UnitNumWidth, OutRegNumWidth>,
dest_written: Bool,
}
#[hdl_module]
fn rob(config: &CpuConfig) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let renamed_insns_in: Array<ReadyValid<ROBRenamedInsn<DynSize, DynSize>>> = m.input(
Array[ReadyValid[ROBRenamedInsn[config.unit_num_width()][config.out_reg_num_width]]]
[config.fetch_width.get()],
);
#[hdl]
let unit_forwarding_info: UnitForwardingInfo<DynSize, DynSize, DynSize> =
m.input(config.unit_forwarding_info());
let rob_entry_ty = ROBEntry[config.unit_num_width()][config.out_reg_num_width];
#[hdl]
let rob = reg_builder()
.clock_domain(cd)
.no_reset(Array[rob_entry_ty][config.rob_size.get()]);
#[hdl]
let rob_valid_start = reg_builder()
.clock_domain(cd)
.reset(UInt::range(0..config.rob_size.get()).zero());
#[hdl]
let rob_valid_end = reg_builder()
.clock_domain(cd)
.reset(UInt::range(0..config.rob_size.get()).zero());
#[hdl]
let free_space = wire(UInt::range_inclusive(0..=config.rob_size.get()));
#[hdl]
if rob_valid_end.cmp_lt(rob_valid_start) {
// rob_valid_end wrapped around but start didn't
connect_any(
free_space,
rob_valid_end + config.rob_size.get() - rob_valid_start,
);
} else {
connect_any(free_space, rob_valid_end - rob_valid_start);
}
struct IndexAndRange {
index: Expr<UInt>,
range: std::ops::Range<usize>,
}
let mut next_write_index = IndexAndRange {
index: rob_valid_end,
range: 0..config.rob_size.get(),
};
for fetch_index in 0..config.fetch_width.get() {
let write_index = next_write_index;
let next_write_index_range = write_index.range.start..write_index.range.end + 1;
next_write_index = IndexAndRange {
index: wire_with_loc(
&format!("next_write_index_{fetch_index}"),
SourceLocation::caller(),
UInt::range(next_write_index_range.clone()),
),
range: next_write_index_range,
};
connect(
renamed_insns_in[fetch_index].ready,
fetch_index.cmp_lt(free_space),
);
#[hdl]
if let HdlSome(renamed_insn) = ReadyValid::firing_data(renamed_insns_in[fetch_index]) {
for i in write_index.range.clone() {
#[hdl]
if write_index.index.cmp_eq(i) {
connect(
rob[i % config.rob_size.get()],
#[hdl]
ROBEntry {
renamed_insn,
dest_written: false,
},
);
}
}
}
// TODO: optimize write_index chain better
connect_any(
next_write_index.index,
write_index.index
+ ReadyValid::firing(renamed_insns_in[fetch_index]).cast_to_static::<UInt<1>>(),
);
}
assert!(
config.rob_size >= config.fetch_width,
"rob_size ({}) is too small for fetch_width = {} -- next_write_index would overflow",
config.rob_size,
config.fetch_width,
);
#[hdl]
if next_write_index.index.cmp_lt(config.rob_size.get()) {
connect_any(rob_valid_end, next_write_index.index);
} else {
connect_any(
rob_valid_end,
next_write_index.index - config.rob_size.get(),
);
}
// TODO: optimize better, O(rob_size * unit_count) is too big here
for rob_index in 0..config.rob_size.get() {
for unit_index in 0..config.non_const_unit_nums().len() {
#[hdl]
if let HdlSome(unit_output_write) = unit_forwarding_info.unit_output_writes[unit_index]
{
#[hdl]
let UnitOutputWrite::<_> {
which: unit_out_reg,
value: _,
} = unit_output_write;
let p_reg_num = #[hdl]
PRegNum::<_, _> {
unit_num: config.unit_num().from_index(unit_index),
unit_out_reg,
};
#[hdl]
if rob[rob_index].renamed_insn.p_dest.cmp_eq(p_reg_num) {
connect(rob[rob_index].dest_written, true);
}
}
}
impl<Width: Size> RetireQueueIndex<Width> {
pub fn insns_until(
this: impl ToExpr<Type = Self>,
target: impl ToExpr<Type = Self>,
) -> Expr<SIntType<Width>> {
let this = this.to_expr();
let target = target.to_expr();
assert_eq!(Expr::ty(this), Expr::ty(target));
(this.index - target.index).cast_to(Expr::ty(this).index.as_same_width_sint())
}
}
@ -205,10 +88,6 @@ pub fn reg_alloc(config: &CpuConfig) {
);
// TODO: finish
#[hdl]
let rob = instance(rob(config));
connect(rob.cd, cd);
let mut rename_table_mems = BTreeMap::<RenameTableName, MemBuilder<_>>::new();
for reg_kind in MOpDestReg::REG_KINDS {
@ -238,11 +117,6 @@ pub fn reg_alloc(config: &CpuConfig) {
#[hdl]
let renamed_mops_out_reg = wire(Array[HdlOption[config.p_reg_num()]][config.fetch_width.get()]);
for fetch_index in 0..config.fetch_width.get() {
// TODO: finish
connect(
rob.renamed_insns_in[fetch_index].data,
Expr::ty(rob).renamed_insns_in.element().data.HdlNone(),
);
// TODO: finish
connect(
fetch_decode_interface.decoded_insns[fetch_index].ready,
@ -483,7 +357,6 @@ pub fn reg_alloc(config: &CpuConfig) {
);
#[hdl]
let unit_forwarding_info = wire(config.unit_forwarding_info());
connect(rob.unit_forwarding_info, unit_forwarding_info);
for (unit_index, unit_config) in config.units.iter().enumerate() {
let dyn_unit = unit_config.kind.unit(config, unit_index);
let unit = instance_with_loc(

View file

@ -0,0 +1,187 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
config::CpuConfig,
instruction::{MOpRegNum, PRegNum},
util::range_intersection,
};
use fayalite::{
memory::{splat_mask, ReadStruct, WriteStruct},
module::memory_with_loc,
prelude::*,
};
use std::{mem, ops::Range};
#[hdl]
pub struct RenameTableReadPort<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub addr: MOpRegNum,
#[hdl(flip)]
pub data: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
#[hdl]
pub struct RenameTableWritePort<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub addr: MOpRegNum,
pub data: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
#[derive(Clone, Debug)]
pub enum RenameTablePortConfig {
Read { addr_range: Range<u32> },
Write { addr_range: Range<u32> },
}
/// register rename table.
/// all read/write operations are done in the order of `port_configs`.
/// So if `port_configs[0]` is a write and `port_configs[1]` is a read,
/// then the read port will combinatorially return data written by the
/// write port in the *same* clock cycle. However, if `port_configs[0]`
/// is a read and `port_configs[1]` is a write, then the read port will
/// not see the data written by the write port until the *next* clock cycle.
#[hdl_module]
pub fn rename_table(config: &CpuConfig, port_configs: &[RenameTablePortConfig]) {
let read_count = port_configs
.iter()
.filter(|v| matches!(v, RenameTablePortConfig::Read { .. }))
.count();
let write_count = port_configs
.iter()
.filter(|v| matches!(v, RenameTablePortConfig::Write { .. }))
.count();
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let read_ports: Array<RenameTableReadPort<DynSize, DynSize>> = m.input(
Array[RenameTableReadPort[config.unit_num_width()][config.out_reg_num_width]][read_count],
);
#[hdl]
let write_ports: Array<RenameTableWritePort<DynSize, DynSize>> = m.input(
Array[RenameTableWritePort[config.unit_num_width()][config.out_reg_num_width]][write_count],
);
for read_port in read_ports {
connect(read_port.data, config.p_reg_num().const_zero());
}
let port_configs_and_indexes = port_configs.iter().scan(
(0usize, 0),
|(read_port_index, write_port_index), port_config| {
Some((
port_config,
match port_config {
RenameTablePortConfig::Read { .. } => {
mem::replace(read_port_index, *read_port_index + 1)
}
RenameTablePortConfig::Write { .. } => {
mem::replace(write_port_index, *write_port_index + 1)
}
},
))
},
);
let mut range_transitions = Vec::with_capacity(port_configs.len() * 2);
for port_config in port_configs {
let (RenameTablePortConfig::Read { addr_range }
| RenameTablePortConfig::Write { addr_range }) = port_config;
range_transitions.push(addr_range.start);
range_transitions.push(addr_range.end);
}
range_transitions.sort_unstable();
range_transitions.dedup();
let mut last_range_transition = None;
for range_transition in range_transitions {
let Some(last_range_transition) = last_range_transition.replace(range_transition) else {
continue;
};
let cur_addr_range = last_range_transition..range_transition;
let mut mem = memory_with_loc(
&if cur_addr_range.len() == 1 {
format!("mem_{:#x}", cur_addr_range.start)
} else {
format!("mem_{:#x}_{:#x}", cur_addr_range.start, cur_addr_range.end)
},
config.p_reg_num(),
SourceLocation::caller(),
);
mem.depth(cur_addr_range.len());
let addr_in_range = |addr: Expr<MOpRegNum>| {
if cur_addr_range.len() == 1 {
addr.value.cmp_eq(cur_addr_range.start)
} else {
addr.value.cmp_ge(cur_addr_range.start) & addr.value.cmp_lt(cur_addr_range.end)
}
};
for (port_config, port_index) in port_configs_and_indexes.clone() {
match port_config {
RenameTablePortConfig::Read { addr_range } => {
if range_intersection(&addr_range, &cur_addr_range).is_none() {
continue;
}
let port = read_ports[port_index];
#[hdl]
let ReadStruct::<_, _> {
addr,
en,
clk,
data,
} = mem.new_read_port();
connect_any(addr, port.addr.value - cur_addr_range.start);
connect(en, addr_in_range(port.addr));
connect(clk, cd.clk);
#[hdl]
if en {
connect(port.data, data);
}
}
RenameTablePortConfig::Write { addr_range } => {
if range_intersection(&addr_range, &cur_addr_range).is_none() {
continue;
}
let port = write_ports[port_index];
#[hdl]
let WriteStruct::<_, _> {
addr,
en,
clk,
data,
mask,
} = mem.new_write_port();
connect_any(addr, port.addr.value - cur_addr_range.start);
connect(en, addr_in_range(port.addr));
connect(clk, cd.clk);
connect(data, port.data);
connect(mask, splat_mask(Expr::ty(port).data, true.to_expr()));
}
}
}
}
for (port_config_index, (port_config, port_index)) in
port_configs_and_indexes.clone().enumerate()
{
let RenameTablePortConfig::Read { addr_range } = port_config else {
continue;
};
let port = read_ports[port_index];
for (prev_port_config, prev_port_index) in
port_configs_and_indexes.clone().take(port_config_index)
{
let RenameTablePortConfig::Write {
addr_range: prev_addr_range,
} = prev_port_config
else {
continue;
};
if range_intersection(addr_range, prev_addr_range).is_none() {
continue;
}
let prev_port = write_ports[prev_port_index];
#[hdl]
if prev_port.addr.cmp_eq(port.addr) {
connect(port.data, prev_port.data);
}
}
}
}

View file

@ -2,7 +2,6 @@
// See Notices.txt for copyright information
pub mod array_vec;
pub mod tree_reduce;
pub(crate) const fn range_u32_len(range: &std::ops::Range<u32>) -> usize {
let retval = range.end.saturating_sub(range.start);
@ -25,3 +24,16 @@ pub(crate) const fn range_u32_nth_or_panic(range: &std::ops::Range<u32>, index:
panic!("index out of range")
}
}
pub(crate) const fn range_intersection(
a: &std::ops::Range<u32>,
b: &std::ops::Range<u32>,
) -> Option<std::ops::Range<u32>> {
let start = if a.start > b.start { a.start } else { b.start };
let end = if a.end < b.end { a.end } else { b.end };
if start < end {
Some(start..end)
} else {
None
}
}

View file

@ -2,11 +2,15 @@
// See Notices.txt for copyright information
use fayalite::{
expr::ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd},
int::SizeType,
expr::{
ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd},
ToLiteralBits,
},
int::{IntType, SizeType},
intern::{Intern, Interned},
prelude::*,
ty::{MatchVariantWithoutScope, StaticType, TypeProperties},
util::ConstBool,
};
use std::{marker::PhantomData, ops::Index};
@ -249,6 +253,29 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
});
array_vec_as_array_of_options
}
#[hdl]
pub fn get<Idx: IntType<Dyn = UInt>>(
this: impl ToExpr<Type = Self>,
index: impl ToExpr<Type = Idx>,
) -> Expr<HdlOption<T>> {
let this = this.to_expr();
let index = Expr::as_dyn_int(index.to_expr());
let never_in_bounds = index.cmp_ge(Expr::ty(this).capacity());
if let Ok(never_in_bounds) = never_in_bounds.to_literal_bits() {
if never_in_bounds[0] {
// avoid error from out-of-bounds constant index
return HdlOption[Expr::ty(this).element()].HdlNone();
}
}
#[hdl]
let array_vec_get = wire(HdlOption[Expr::ty(this).element()]);
connect(array_vec_get, Expr::ty(array_vec_get).HdlNone());
#[hdl]
if index.cmp_lt(Length::as_uint(Self::len(this))) {
connect(array_vec_get, HdlSome(this.elements[index]));
}
array_vec_get
}
}
impl<T: Type, N: Size, Idx, IdxWidth: Size> ExprIndex<Idx> for ArrayVec<T, N>
@ -263,3 +290,35 @@ where
<ArrayType<T, N> as ExprIndex<Idx>>::expr_index(&this.elements, index)
}
}
#[hdl]
pub struct ReadyValidArray<T: Type, N: Size> {
pub data: ArrayVec<T, N>,
#[hdl(flip)]
pub ready: Length<N>,
}
impl<T: Type, N: Size> ReadyValidArray<T, N> {
#[hdl]
pub fn firing_len(this: impl ToExpr<Type = Self>) -> Expr<Length<N>> {
let this = this.to_expr();
assert_eq!(Expr::ty(this).data.len_ty(), Expr::ty(this).ready);
#[hdl]
let firing_len = wire(Expr::ty(this).data.len);
connect(firing_len, this.data.len);
#[hdl]
if this.data.len.cmp_gt(this.ready) {
connect(firing_len, this.ready);
}
firing_len
}
#[hdl]
pub fn firing_data(this: impl ToExpr<Type = Self>) -> Expr<ArrayVec<T, N>> {
let this = this.to_expr();
#[hdl]
let firing_data = wire(Expr::ty(this).data);
connect(firing_data, this.data);
connect(firing_data.len, Self::firing_len(this));
firing_data
}
}

View file

@ -1,152 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum TreeReduceOp {
Input,
Reduce,
}
#[derive(Copy, Clone, Debug)]
struct Entry {
start: usize,
depth: u32,
}
#[derive(Clone, Debug)]
pub struct TreeReduceOps {
len: usize,
stack: Vec<Entry>,
}
impl TreeReduceOps {
pub fn new(len: usize) -> Self {
TreeReduceOps {
len,
stack: Vec::new(),
}
}
}
impl Iterator for TreeReduceOps {
type Item = TreeReduceOp;
fn next(&mut self) -> Option<Self::Item> {
match *self.stack {
[] if self.len != 0 => {
self.stack.push(Entry { start: 0, depth: 0 });
Some(TreeReduceOp::Input)
}
[.., ref mut second_last, last] if second_last.depth == last.depth => {
second_last.depth += 1;
self.stack.pop();
Some(TreeReduceOp::Reduce)
}
[.., last] if self.len - last.start > 1 << last.depth => {
let start = last.start + (1 << last.depth);
self.stack.push(Entry { start, depth: 0 });
Some(TreeReduceOp::Input)
}
[.., ref mut second_last, _] => {
second_last.depth += 1;
self.stack.pop();
Some(TreeReduceOp::Reduce)
}
_ => None,
}
}
}
#[track_caller]
pub fn tree_reduce_with_state<S, I, R>(
iter: impl IntoIterator<IntoIter: ExactSizeIterator, Item = I>,
state: &mut S,
mut input: impl FnMut(&mut S, I) -> R,
mut reduce: impl FnMut(&mut S, R, R) -> R,
) -> Option<R> {
let mut stack = Vec::new();
let mut iter = iter.into_iter();
for op in TreeReduceOps::new(iter.len()) {
match op {
TreeReduceOp::Input => stack.push(input(
state,
iter.next().expect("inconsistent iterator len() and next()"),
)),
TreeReduceOp::Reduce => {
let Some(r) = stack.pop() else {
unreachable!();
};
let Some(l) = stack.pop() else {
unreachable!();
};
stack.push(reduce(state, l, r));
}
}
}
stack.pop()
}
pub fn tree_reduce<T>(
iter: impl IntoIterator<Item = T, IntoIter: ExactSizeIterator>,
mut reduce: impl FnMut(T, T) -> T,
) -> Option<T> {
tree_reduce_with_state(iter, &mut (), |_, v| v, move |_, l, r| reduce(l, r))
}
#[cfg(test)]
mod tests {
use super::*;
use std::ops::Range;
fn recursive_tree_reduce(range: Range<usize>, ops: &mut Vec<TreeReduceOp>) {
if range.len() == 1 {
ops.push(TreeReduceOp::Input);
return;
}
if range.is_empty() {
return;
}
let pow2_len = range.len().next_power_of_two();
let split = range.start + pow2_len / 2;
recursive_tree_reduce(range.start..split, ops);
recursive_tree_reduce(split..range.end, ops);
ops.push(TreeReduceOp::Reduce);
}
#[test]
fn test_tree_reduce() {
const EXPECTED: &'static [&'static [TreeReduceOp]] = {
use TreeReduceOp::{Input as I, Reduce as R};
&[
&[],
&[I],
&[I, I, R],
&[I, I, R, I, R],
&[I, I, R, I, I, R, R],
&[I, I, R, I, I, R, R, I, R],
&[I, I, R, I, I, R, R, I, I, R, R],
&[I, I, R, I, I, R, R, I, I, R, I, R, R],
&[I, I, R, I, I, R, R, I, I, R, I, I, R, R, R],
]
};
for len in 0..64 {
let mut expected = vec![];
recursive_tree_reduce(0..len, &mut expected);
if let Some(&expected2) = EXPECTED.get(len) {
assert_eq!(*expected, *expected2, "len={len}");
}
assert_eq!(
TreeReduceOps::new(len).collect::<Vec<_>>(),
expected,
"len={len}"
);
let seq: Vec<_> = (0..len).collect();
assert_eq!(
seq,
tree_reduce(seq.iter().map(|&v| vec![v]), |mut l, r| {
l.extend_from_slice(&r);
l
})
.unwrap_or_default()
);
}
}
}