WIP: splitting reg_alloc
Some checks failed
/ deps (push) Successful in 14s
/ test (push) Failing after 2m7s

This commit is contained in:
Jacob Lifshay 2025-03-05 23:50:38 -08:00
parent ea72988f74
commit 3a649c8997
Signed by: programmerjake
SSH key fingerprint: SHA256:HnFTLGpSm4Q4Fj502oCFisjZSoakwEuTsJJMSke63RQ
15 changed files with 818 additions and 543 deletions

View file

@ -16,3 +16,6 @@ version.workspace = true
[dependencies]
fayalite.workspace = true
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(todo)'] }

View file

@ -3,8 +3,9 @@
use crate::{
instruction::{MOpTrait, PRegNum, RenamedMOp, UnitNum, UnitOutRegNum, CONST_ZERO_UNIT_NUM},
unit::{
unit_base::{UnitForwardingInfo, UnitToRegAlloc},
UnitCancelInput, UnitKind, UnitOutputWrite,
unit_base::{ExecuteEnd, ExecuteStart},
RenamedInsnData, RetireQueueIndex, UnitForwardingInfo, UnitKind, UnitOutputWrite,
UnitToRegAlloc,
},
};
use fayalite::prelude::*;
@ -35,7 +36,6 @@ pub struct CpuConfig {
pub fetch_width: NonZeroUsize,
/// default value for [`UnitConfig::max_in_flight`]
pub default_unit_max_in_flight: NonZeroUsize,
pub rob_size: NonZeroUsize,
}
impl CpuConfig {
@ -52,13 +52,12 @@ impl CpuConfig {
};
v
};
pub fn new(units: Vec<UnitConfig>, rob_size: NonZeroUsize) -> Self {
pub fn new(units: Vec<UnitConfig>) -> Self {
Self {
units,
out_reg_num_width: Self::DEFAULT_OUT_REG_NUM_WIDTH,
fetch_width: Self::DEFAULT_FETCH_WIDTH,
default_unit_max_in_flight: Self::DEFAULT_UNIT_MAX_IN_FLIGHT,
rob_size,
}
}
pub fn non_const_unit_nums(&self) -> std::ops::Range<usize> {
@ -79,8 +78,8 @@ impl CpuConfig {
pub fn p_reg_num_width(&self) -> usize {
self.unit_num_width() + self.out_reg_num_width
}
pub fn renamed_mop_in_unit(&self) -> RenamedMOp<UnitOutRegNum<DynSize>, DynSize> {
RenamedMOp[self.unit_out_reg_num()][self.p_reg_num_width()]
pub fn renamed_mop(&self) -> RenamedMOp<DynSize> {
RenamedMOp[self.p_reg_num_width()]
}
pub fn unit_output_write(&self) -> UnitOutputWrite<DynSize> {
UnitOutputWrite[self.out_reg_num_width]
@ -88,9 +87,6 @@ impl CpuConfig {
pub fn unit_output_writes(&self) -> Array<HdlOption<UnitOutputWrite<DynSize>>> {
Array[HdlOption[self.unit_output_write()]][self.non_const_unit_nums().len()]
}
pub fn unit_cancel_input(&self) -> UnitCancelInput<DynSize> {
UnitCancelInput[self.out_reg_num_width]
}
pub fn unit_forwarding_info(&self) -> UnitForwardingInfo<DynSize, DynSize, DynSize> {
UnitForwardingInfo[self.unit_num_width()][self.out_reg_num_width]
[self.non_const_unit_nums().len()]
@ -101,19 +97,39 @@ impl CpuConfig {
.unwrap_or(self.default_unit_max_in_flight)
}
pub fn unit_to_reg_alloc<
MOp: Type + MOpTrait<DestReg = UnitOutRegNum<DynSize>, SrcRegWidth = DynSize>,
MOp: Type + MOpTrait<DestReg = (), SrcRegWidth = DynSize>,
ExtraOut: Type,
>(
&self,
mop_ty: MOp,
extra_out_ty: ExtraOut,
) -> UnitToRegAlloc<MOp, ExtraOut, DynSize, DynSize, DynSize> {
assert_eq!(
mop_ty.dest_reg_ty(),
self.unit_out_reg_num(),
"inconsistent types",
);
) -> UnitToRegAlloc<MOp, ExtraOut, DynSize, DynSize, DynSize, DynSize> {
UnitToRegAlloc[mop_ty][extra_out_ty][self.unit_num_width()][self.out_reg_num_width]
[self.non_const_unit_nums().len()]
[self.non_const_unit_nums().len()][self.retire_queue_index_width()]
}
pub fn retire_queue_index_width(&self) -> usize {
let max_in_flight: usize = (0..self.units.len())
.map(|unit_index| self.unit_max_in_flight(unit_index).get())
.sum();
2 + max_in_flight.next_power_of_two().ilog2() as usize
}
pub fn retire_queue_index(&self) -> RetireQueueIndex<DynSize> {
RetireQueueIndex[self.retire_queue_index_width()]
}
pub fn renamed_insn_data<MOp: Type, DestReg: Type>(
&self,
mop: MOp,
dest: DestReg,
) -> RenamedInsnData<MOp, DestReg, DynSize> {
RenamedInsnData[mop][dest][self.retire_queue_index_width()]
}
pub fn execute_start<MOp: Type>(&self, mop: MOp) -> ExecuteStart<MOp, DynSize, DynSize> {
ExecuteStart[mop][self.out_reg_num_width][self.retire_queue_index_width()]
}
pub fn execute_end<ExtraOut: Type>(
&self,
extra_out_ty: ExtraOut,
) -> ExecuteEnd<DynSize, DynSize, ExtraOut> {
ExecuteEnd[self.out_reg_num_width][self.retire_queue_index_width()][extra_out_ty]
}
}

View file

@ -910,11 +910,9 @@ impl MOpRegNum {
//
// TODO: maybe add more registers later.
pub const FLAG_REG_NUMS: Range<u32> = 0xFE..0x100;
/// registers handled by a special small rename table (for flags and stuff, since it has more read/write ports)
pub const SPECIAL_REG_NUMS: Range<u32> = Self::FLAG_REG_NUMS;
/// registers handled by the large rename table for normal registers (has less read/write ports)
pub const NORMAL_REG_NUMS: Range<u32> =
Self::CONST_ZERO_REG_NUM + 1..Self::SPECIAL_REG_NUMS.start;
/// registers that aren't constants
pub const NON_CONST_REG_NUMS: Range<u32> =
Self::CONST_ZERO_REG_NUM + 1..Self::FLAG_REG_NUMS.end;
}
#[hdl(cmp_eq)]
@ -929,29 +927,6 @@ pub struct MOpDestReg {
pub flag_regs: Array<HdlOption<()>, { range_u32_len(&MOpRegNum::FLAG_REG_NUMS) }>,
}
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
pub enum RenameTableName {
/// the large rename table for normal registers (has less read/write ports)
Normal,
/// a special small rename table (for flags and stuff, since it has more read/write ports)
Special,
}
impl RenameTableName {
pub const fn reg_range(self) -> std::ops::Range<u32> {
match self {
Self::Normal => MOpRegNum::NORMAL_REG_NUMS,
Self::Special => MOpRegNum::SPECIAL_REG_NUMS,
}
}
pub const fn as_str(self) -> &'static str {
match self {
Self::Normal => "rename_table_normal",
Self::Special => "rename_table_special",
}
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub enum MOpDestRegKind {
NormalReg {
@ -989,16 +964,13 @@ impl fmt::Display for MOpDestRegName {
}
impl MOpDestRegKind {
pub const fn reg_range(self) -> std::ops::Range<u32> {
pub const fn reg_num_range(self) -> std::ops::Range<u32> {
match self {
Self::NormalReg { .. } => MOpRegNum::NORMAL_REG_NUMS,
Self::FlagReg { .. } => MOpRegNum::FLAG_REG_NUMS,
}
}
pub const fn rename_table_names(self) -> &'static [RenameTableName] {
match self {
Self::NormalReg { .. } => &[RenameTableName::Normal, RenameTableName::Special],
Self::FlagReg { .. } => &[RenameTableName::Special],
Self::NormalReg { dest_reg_index: _ } => MOpRegNum::NON_CONST_REG_NUMS,
Self::FlagReg {
reg_num,
flag_reg_index: _,
} => reg_num..reg_num + 1,
}
}
pub fn fixed_reg_num(self) -> Option<u32> {
@ -1091,5 +1063,5 @@ pub type MOp = UnitMOp<
>;
#[hdl]
pub type RenamedMOp<DestReg: Type, SrcRegWidth: Size> =
UnitMOp<DestReg, SrcRegWidth, L2RegisterFileMOp<DestReg, SrcRegWidth>>;
pub type RenamedMOp<SrcRegWidth: Size> =
UnitMOp<(), SrcRegWidth, L2RegisterFileMOp<(), SrcRegWidth>>;

View file

@ -0,0 +1,267 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
config::CpuConfig,
instruction::{MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, RenamedMOp},
rename_table::{rename_table, RenameTablePortConfig},
unit::{RenamedInsnData, RetireQueueIndex, UnitMOp},
util::array_vec::{ArrayVec, Length, ReadyValidArray},
};
use fayalite::{
prelude::*,
util::{prefix_sum::PrefixSumAlgorithm, ready_valid::ReadyValid},
};
#[hdl]
pub struct InstructionRenameInputInsn<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub mop: MOp,
pub pc: UInt<64>,
pub renamed_dest: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
impl CpuConfig {
pub fn instruction_rename_input_insn(&self) -> InstructionRenameInputInsn<DynSize, DynSize> {
InstructionRenameInputInsn[self.unit_num_width()][self.out_reg_num_width]
}
}
#[hdl]
struct InsnsInPrefixSummary<FetchWidth: Size> {
all_ready: Bool,
ready_count: Length<FetchWidth>,
retire_queue_used: Length<FetchWidth>,
}
#[hdl_module]
pub fn instruction_rename(config: &CpuConfig) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let insns_in: ReadyValidArray<InstructionRenameInputInsn<DynSize, DynSize>, DynSize> =
m.input(ReadyValidArray[config.instruction_rename_input_insn()][config.fetch_width.get()]);
#[hdl]
let start_retire_queue_index: RetireQueueIndex<DynSize> = m.input(config.retire_queue_index());
#[hdl]
let end_retire_queue_index: RetireQueueIndex<DynSize> = m.output(config.retire_queue_index());
#[hdl]
let insns_out: Array<
ReadyValid<RenamedInsnData<RenamedMOp<DynSize>, PRegNum<DynSize, DynSize>, DynSize>>,
> = m.output(
Array[ReadyValid[config.renamed_insn_data(config.renamed_mop(), config.p_reg_num())]]
[config.fetch_width.get()],
);
// TODO: handle resetting table after cancelling instructions
#[hdl]
let insns_ready_or_move = wire(Array[Bool][config.fetch_width.get()]);
for (insn_ready_or_move, insn_out) in insns_ready_or_move.into_iter().zip(insns_out) {
connect(insn_ready_or_move, insn_out.ready);
}
ArrayVec::for_each(insns_in.data, |fetch_index, input_insn| {
#[hdl]
match input_insn.mop {
UnitMOp::<_, _, _>::TransformedMove(_) => {
connect(insns_ready_or_move[fetch_index], true);
}
UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {}
}
});
let insns_in_prefix_summary_ty = InsnsInPrefixSummary[config.fetch_width.get()];
#[hdl]
let insns_in_prefix_summaries =
wire(Array[insns_in_prefix_summary_ty][config.fetch_width.get()]);
let insns_in_prefix_summaries_vec = PrefixSumAlgorithm::WorkEfficient.run(
(0..config.fetch_width.get()).map(|fetch_index| {
#[hdl]
let insns_in_prefix_summary_in = wire(insns_in_prefix_summary_ty);
#[hdl]
let InsnsInPrefixSummary::<_> {
all_ready,
ready_count,
retire_queue_used,
} = insns_in_prefix_summary_in;
connect(all_ready, insns_out[fetch_index].ready);
connect(
ready_count,
Expr::ty(ready_count).cast_from_uint_unchecked(all_ready.cast_to(UInt[1])),
);
connect(retire_queue_used, Expr::ty(retire_queue_used).zero());
#[hdl]
if let HdlSome(input_insn) = ArrayVec::get(insns_in.data, fetch_index) {
connect(retire_queue_used, ready_count);
#[hdl]
match input_insn.mop {
UnitMOp::<_, _, _>::TransformedMove(_) => {
connect(all_ready, true);
}
UnitMOp::<_, _, _>::AluBranch(_) | UnitMOp::<_, _, _>::LoadStore(_) => {}
}
}
insns_in_prefix_summary_in
}),
|l, r| {
#[hdl]
let insns_in_prefix_summary_merge = wire(insns_in_prefix_summary_ty);
#[hdl]
let InsnsInPrefixSummary::<_> {
all_ready,
ready_count,
retire_queue_used,
} = insns_in_prefix_summary_merge;
connect(all_ready, l.all_ready & r.all_ready);
#[hdl]
if l.all_ready {
connect(
ready_count,
Expr::ty(ready_count).cast_from_uint_unchecked(
Length::as_uint(l.ready_count) + Length::as_uint(r.ready_count),
),
);
connect(
retire_queue_used,
Expr::ty(retire_queue_used).cast_from_uint_unchecked(
Length::as_uint(l.retire_queue_used) + Length::as_uint(r.retire_queue_used),
),
);
} else {
connect(ready_count, l.ready_count);
connect(retire_queue_used, l.retire_queue_used);
}
insns_in_prefix_summary_merge
},
);
for (l, r) in insns_in_prefix_summaries
.into_iter()
.zip(insns_in_prefix_summaries_vec)
{
connect(l, r);
}
connect(
insns_in.ready,
insns_in_prefix_summaries[config.fetch_width.get() - 1].ready_count,
);
#[hdl]
let retire_queue_indexes =
wire(Array[config.retire_queue_index()][config.fetch_width.get() + 1]);
connect(retire_queue_indexes[0], start_retire_queue_index);
connect(
end_retire_queue_index,
retire_queue_indexes[config.fetch_width.get()],
);
for (retire_queue_index, insns_in_prefix_summary) in retire_queue_indexes
.into_iter()
.skip(1)
.zip(insns_in_prefix_summaries)
{
connect_any(
retire_queue_index.index,
start_retire_queue_index.index
+ Length::as_uint(insns_in_prefix_summary.retire_queue_used),
);
}
let mut port_configs = Vec::new();
let mut src_reg_count = 0;
MOpTrait::for_each_src_reg(MOp.uninit(), &mut |_, src_index| {
src_reg_count = src_reg_count.max(src_index + 1);
});
for _ in 0..config.fetch_width.get() {
for _ in 0..src_reg_count {
port_configs.push(RenameTablePortConfig::Read {
addr_range: MOpRegNum::NON_CONST_REG_NUMS,
});
}
for dest_reg_kind in MOpDestReg::REG_KINDS {
port_configs.push(RenameTablePortConfig::Write {
addr_range: dest_reg_kind.reg_num_range(),
});
}
}
#[hdl]
let rename_table = instance(rename_table(config, &port_configs));
connect(rename_table.cd, cd);
for read_port in rename_table.read_ports {
connect_any(read_port.addr, 0_hdl_u0);
}
for write_port in rename_table.write_ports {
connect_any(write_port.addr, 0_hdl_u0);
connect_any(write_port.data, config.p_reg_num().const_zero());
}
ArrayVec::for_each(
ReadyValidArray::firing_data(insns_in),
|fetch_index, input_insn| {
let read_port_index = fetch_index * src_reg_count;
let write_port_index = fetch_index * MOpDestReg::REG_COUNT;
#[hdl]
let InstructionRenameInputInsn::<_, _> {
mop,
pc,
renamed_dest,
} = input_insn;
let insn_out = MOpTrait::map_regs(
mop,
(),
config.p_reg_num_width(),
&mut |src_reg, src_index| {
connect(
rename_table.read_ports[read_port_index + src_index].addr,
src_reg.cast_bits_to(MOpRegNum),
);
rename_table.read_ports[read_port_index + src_index]
.data
.cast_to_bits()
},
);
for (i, dest_reg) in MOpDestReg::regs(MOpTrait::dest_reg(mop))
.into_iter()
.enumerate()
{
connect(
rename_table.write_ports[write_port_index + i].addr,
dest_reg,
);
connect(
rename_table.write_ports[write_port_index + i].data,
renamed_dest,
);
}
let insn_out = UnitMOp::try_with_transformed_move_op(
insn_out,
config.renamed_mop().TransformedMove,
|insn_out: Expr<HdlOption<_>>, move_reg: Expr<MoveRegMOp<_, _>>| {
for i in 0..MOpDestReg::REG_COUNT {
// execute move by using same PRegNum as src[0] for dest
connect(
rename_table.write_ports[write_port_index + i].data,
move_reg.common.src[0].cast_bits_to(config.p_reg_num()),
);
}
// move already executed, so remove it
connect(insn_out, Expr::ty(insn_out).HdlNone());
},
);
connect(
insns_out[fetch_index].data,
HdlOption::map(insn_out, |insn_out| {
#[hdl]
RenamedInsnData::<_, _, _> {
retire_queue_index: retire_queue_indexes[fetch_index],
pc,
dest: renamed_dest,
mop: insn_out,
}
}),
);
},
);
}

View file

@ -2,7 +2,10 @@
// See Notices.txt for copyright information
pub mod config;
pub mod instruction;
pub mod instruction_rename;
pub mod reg_alloc;
pub mod register;
pub mod rename_table;
pub mod retire_queue;
pub mod unit;
pub mod util;

View file

@ -3,17 +3,17 @@
use crate::{
config::CpuConfig,
instruction::{
MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, RenameTableName, UnitOutRegNum,
MOp, MOpDestReg, MOpRegNum, MOpTrait, MoveRegMOp, PRegNum, UnitOutRegNum,
COMMON_MOP_SRC_LEN,
},
unit::{
unit_base::{UnitForwardingInfo, UnitInput},
GlobalState, TrapData, UnitMOp, UnitOutput, UnitOutputWrite, UnitResult,
UnitResultCompleted, UnitTrait,
},
util::tree_reduce::tree_reduce_with_state,
util::array_vec::ReadyValidArray,
};
use fayalite::{
int::BoolOrIntType,
memory::{splat_mask, WriteStruct},
module::{instance_with_loc, memory_with_loc, wire_with_loc},
prelude::*,
@ -44,150 +44,12 @@ pub enum FetchDecodeSpecialOp {
#[hdl]
pub struct FetchDecodeInterface<FetchWidth: Size> {
pub decoded_insns: ArrayType<ReadyValid<FetchedDecodedMOp>, FetchWidth>,
pub decoded_insns: ReadyValidArray<FetchedDecodedMOp, FetchWidth>,
#[hdl(flip)]
pub fetch_decode_special_op: ReadyValid<FetchDecodeSpecialOp>,
}
#[hdl]
struct ROBRenamedInsn<UnitNumWidth: Size, OutRegNumWidth: Size> {
mop_dest: MOpDestReg,
p_dest: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
#[hdl]
struct ROBEntry<UnitNumWidth: Size, OutRegNumWidth: Size> {
renamed_insn: ROBRenamedInsn<UnitNumWidth, OutRegNumWidth>,
dest_written: Bool,
}
#[hdl_module]
fn rob(config: &CpuConfig) {
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let renamed_insns_in: Array<ReadyValid<ROBRenamedInsn<DynSize, DynSize>>> = m.input(
Array[ReadyValid[ROBRenamedInsn[config.unit_num_width()][config.out_reg_num_width]]]
[config.fetch_width.get()],
);
#[hdl]
let unit_forwarding_info: UnitForwardingInfo<DynSize, DynSize, DynSize> =
m.input(config.unit_forwarding_info());
let rob_entry_ty = ROBEntry[config.unit_num_width()][config.out_reg_num_width];
#[hdl]
let rob = reg_builder()
.clock_domain(cd)
.no_reset(Array[rob_entry_ty][config.rob_size.get()]);
#[hdl]
let rob_valid_start = reg_builder()
.clock_domain(cd)
.reset(UInt::range(0..config.rob_size.get()).zero());
#[hdl]
let rob_valid_end = reg_builder()
.clock_domain(cd)
.reset(UInt::range(0..config.rob_size.get()).zero());
#[hdl]
let free_space = wire(UInt::range_inclusive(0..=config.rob_size.get()));
#[hdl]
if rob_valid_end.cmp_lt(rob_valid_start) {
// rob_valid_end wrapped around but start didn't
connect_any(
free_space,
rob_valid_end + config.rob_size.get() - rob_valid_start,
);
} else {
connect_any(free_space, rob_valid_end - rob_valid_start);
}
struct IndexAndRange {
index: Expr<UInt>,
range: std::ops::Range<usize>,
}
let mut next_write_index = IndexAndRange {
index: rob_valid_end,
range: 0..config.rob_size.get(),
};
for fetch_index in 0..config.fetch_width.get() {
let write_index = next_write_index;
let next_write_index_range = write_index.range.start..write_index.range.end + 1;
next_write_index = IndexAndRange {
index: wire_with_loc(
&format!("next_write_index_{fetch_index}"),
SourceLocation::caller(),
UInt::range(next_write_index_range.clone()),
),
range: next_write_index_range,
};
connect(
renamed_insns_in[fetch_index].ready,
fetch_index.cmp_lt(free_space),
);
#[hdl]
if let HdlSome(renamed_insn) = ReadyValid::firing_data(renamed_insns_in[fetch_index]) {
for i in write_index.range.clone() {
#[hdl]
if write_index.index.cmp_eq(i) {
connect(
rob[i % config.rob_size.get()],
#[hdl]
ROBEntry {
renamed_insn,
dest_written: false,
},
);
}
}
}
// TODO: optimize write_index chain better
connect_any(
next_write_index.index,
write_index.index
+ ReadyValid::firing(renamed_insns_in[fetch_index]).cast_to_static::<UInt<1>>(),
);
}
assert!(
config.rob_size >= config.fetch_width,
"rob_size ({}) is too small for fetch_width = {} -- next_write_index would overflow",
config.rob_size,
config.fetch_width,
);
#[hdl]
if next_write_index.index.cmp_lt(config.rob_size.get()) {
connect_any(rob_valid_end, next_write_index.index);
} else {
connect_any(
rob_valid_end,
next_write_index.index - config.rob_size.get(),
);
}
// TODO: optimize better, O(rob_size * unit_count) is too big here
for rob_index in 0..config.rob_size.get() {
for unit_index in 0..config.non_const_unit_nums().len() {
#[hdl]
if let HdlSome(unit_output_write) = unit_forwarding_info.unit_output_writes[unit_index]
{
#[hdl]
let UnitOutputWrite::<_> {
which: unit_out_reg,
value: _,
} = unit_output_write;
let p_reg_num = #[hdl]
PRegNum::<_, _> {
unit_num: config.unit_num().from_index(unit_index),
unit_out_reg,
};
#[hdl]
if rob[rob_index].renamed_insn.p_dest.cmp_eq(p_reg_num) {
connect(rob[rob_index].dest_written, true);
}
}
}
}
}
#[cfg(todo)]
#[hdl_module]
/// combination register allocator, register renaming, unit selection, and retire handling
pub fn reg_alloc(config: &CpuConfig) {
@ -205,10 +67,6 @@ pub fn reg_alloc(config: &CpuConfig) {
);
// TODO: finish
#[hdl]
let rob = instance(rob(config));
connect(rob.cd, cd);
let mut rename_table_mems = BTreeMap::<RenameTableName, MemBuilder<_>>::new();
for reg_kind in MOpDestReg::REG_KINDS {
@ -238,11 +96,6 @@ pub fn reg_alloc(config: &CpuConfig) {
#[hdl]
let renamed_mops_out_reg = wire(Array[HdlOption[config.p_reg_num()]][config.fetch_width.get()]);
for fetch_index in 0..config.fetch_width.get() {
// TODO: finish
connect(
rob.renamed_insns_in[fetch_index].data,
Expr::ty(rob).renamed_insns_in.element().data.HdlNone(),
);
// TODO: finish
connect(
fetch_decode_interface.decoded_insns[fetch_index].ready,
@ -483,7 +336,6 @@ pub fn reg_alloc(config: &CpuConfig) {
);
#[hdl]
let unit_forwarding_info = wire(config.unit_forwarding_info());
connect(rob.unit_forwarding_info, unit_forwarding_info);
for (unit_index, unit_config) in config.units.iter().enumerate() {
let dyn_unit = unit_config.kind.unit(config, unit_index);
let unit = instance_with_loc(

View file

@ -1,7 +1,10 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::util::tree_reduce::tree_reduce;
use fayalite::{module::wire_with_loc, prelude::*, util::ready_valid::ReadyValid};
use fayalite::{
module::wire_with_loc,
prelude::*,
util::{prefix_sum::reduce, ready_valid::ReadyValid},
};
use std::{num::NonZeroUsize, ops::Range};
#[hdl_module]
@ -44,7 +47,7 @@ pub fn unit_free_regs_tracker(
count,
count_overflowed,
alloc_nums,
}) = tree_reduce(
}) = reduce(
(0..reg_count).map(|index| Summary {
range: index..index + 1,
count: (!allocated_reg[index])

View file

@ -0,0 +1,187 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::{
config::CpuConfig,
instruction::{MOpRegNum, PRegNum},
util::range_intersection,
};
use fayalite::{
memory::{splat_mask, ReadStruct, WriteStruct},
module::memory_with_loc,
prelude::*,
};
use std::{mem, ops::Range};
#[hdl]
pub struct RenameTableReadPort<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub addr: MOpRegNum,
#[hdl(flip)]
pub data: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
#[hdl]
pub struct RenameTableWritePort<UnitNumWidth: Size, OutRegNumWidth: Size> {
pub addr: MOpRegNum,
pub data: PRegNum<UnitNumWidth, OutRegNumWidth>,
}
#[derive(Clone, Debug)]
pub enum RenameTablePortConfig {
Read { addr_range: Range<u32> },
Write { addr_range: Range<u32> },
}
/// register rename table.
/// all read/write operations are done in the order of `port_configs`.
/// So if `port_configs[0]` is a write and `port_configs[1]` is a read,
/// then the read port will combinatorially return data written by the
/// write port in the *same* clock cycle. However, if `port_configs[0]`
/// is a read and `port_configs[1]` is a write, then the read port will
/// not see the data written by the write port until the *next* clock cycle.
#[hdl_module]
pub fn rename_table(config: &CpuConfig, port_configs: &[RenameTablePortConfig]) {
let read_count = port_configs
.iter()
.filter(|v| matches!(v, RenameTablePortConfig::Read { .. }))
.count();
let write_count = port_configs
.iter()
.filter(|v| matches!(v, RenameTablePortConfig::Write { .. }))
.count();
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let read_ports: Array<RenameTableReadPort<DynSize, DynSize>> = m.input(
Array[RenameTableReadPort[config.unit_num_width()][config.out_reg_num_width]][read_count],
);
#[hdl]
let write_ports: Array<RenameTableWritePort<DynSize, DynSize>> = m.input(
Array[RenameTableWritePort[config.unit_num_width()][config.out_reg_num_width]][write_count],
);
for read_port in read_ports {
connect(read_port.data, config.p_reg_num().const_zero());
}
let port_configs_and_indexes = port_configs.iter().scan(
(0usize, 0),
|(read_port_index, write_port_index), port_config| {
Some((
port_config,
match port_config {
RenameTablePortConfig::Read { .. } => {
mem::replace(read_port_index, *read_port_index + 1)
}
RenameTablePortConfig::Write { .. } => {
mem::replace(write_port_index, *write_port_index + 1)
}
},
))
},
);
let mut range_transitions = Vec::with_capacity(port_configs.len() * 2);
for port_config in port_configs {
let (RenameTablePortConfig::Read { addr_range }
| RenameTablePortConfig::Write { addr_range }) = port_config;
range_transitions.push(addr_range.start);
range_transitions.push(addr_range.end);
}
range_transitions.sort_unstable();
range_transitions.dedup();
let mut last_range_transition = None;
for range_transition in range_transitions {
let Some(last_range_transition) = last_range_transition.replace(range_transition) else {
continue;
};
let cur_addr_range = last_range_transition..range_transition;
let mut mem = memory_with_loc(
&if cur_addr_range.len() == 1 {
format!("mem_{:#x}", cur_addr_range.start)
} else {
format!("mem_{:#x}_{:#x}", cur_addr_range.start, cur_addr_range.end)
},
config.p_reg_num(),
SourceLocation::caller(),
);
mem.depth(cur_addr_range.len());
let addr_in_range = |addr: Expr<MOpRegNum>| {
if cur_addr_range.len() == 1 {
addr.value.cmp_eq(cur_addr_range.start)
} else {
addr.value.cmp_ge(cur_addr_range.start) & addr.value.cmp_lt(cur_addr_range.end)
}
};
for (port_config, port_index) in port_configs_and_indexes.clone() {
match port_config {
RenameTablePortConfig::Read { addr_range } => {
if range_intersection(&addr_range, &cur_addr_range).is_none() {
continue;
}
let port = read_ports[port_index];
#[hdl]
let ReadStruct::<_, _> {
addr,
en,
clk,
data,
} = mem.new_read_port();
connect_any(addr, port.addr.value - cur_addr_range.start);
connect(en, addr_in_range(port.addr));
connect(clk, cd.clk);
#[hdl]
if en {
connect(port.data, data);
}
}
RenameTablePortConfig::Write { addr_range } => {
if range_intersection(&addr_range, &cur_addr_range).is_none() {
continue;
}
let port = write_ports[port_index];
#[hdl]
let WriteStruct::<_, _> {
addr,
en,
clk,
data,
mask,
} = mem.new_write_port();
connect_any(addr, port.addr.value - cur_addr_range.start);
connect(en, addr_in_range(port.addr));
connect(clk, cd.clk);
connect(data, port.data);
connect(mask, splat_mask(Expr::ty(port).data, true.to_expr()));
}
}
}
}
for (port_config_index, (port_config, port_index)) in
port_configs_and_indexes.clone().enumerate()
{
let RenameTablePortConfig::Read { addr_range } = port_config else {
continue;
};
let port = read_ports[port_index];
for (prev_port_config, prev_port_index) in
port_configs_and_indexes.clone().take(port_config_index)
{
let RenameTablePortConfig::Write {
addr_range: prev_addr_range,
} = prev_port_config
else {
continue;
};
if range_intersection(addr_range, prev_addr_range).is_none() {
continue;
}
let prev_port = write_ports[prev_port_index];
#[hdl]
if prev_port.addr.cmp_eq(port.addr) {
connect(port.data, prev_port.data);
}
}
}
}

View file

@ -0,0 +1,12 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use crate::config::CpuConfig;
use fayalite::prelude::*;
#[hdl_module]
pub fn retire_queue(config: &CpuConfig) {
#[hdl]
let cd: ClockDomain = m.input();
todo!();
}

View file

@ -8,13 +8,15 @@ use crate::{
RenamedMOp, UnitOutRegNum,
},
register::{FlagsMode, PRegValue},
unit::unit_base::UnitToRegAlloc,
};
use fayalite::{
bundle::{Bundle, BundleType},
int::BoolOrIntType,
intern::{Intern, Interned},
prelude::*,
util::ready_valid::ReadyValid,
};
use std::marker::PhantomData;
pub mod alu_branch;
pub mod unit_base;
@ -204,23 +206,28 @@ macro_rules! all_units {
})*
};
$(impl<$DestReg: Type, $SrcRegWidth: Size> MOpInto<RenamedMOp<$DestReg, $SrcRegWidth>> for $BeforeOp {
fn mop_into_ty(self) -> RenamedMOp<$DestReg, $SrcRegWidth> {
RenamedMOp[MOpTrait::dest_reg_ty(self)][MOpTrait::src_reg_width(self)]
}
fn mop_into(this: Expr<Self>) -> Expr<RenamedMOp<$DestReg, $SrcRegWidth>> {
MOpInto::<RenamedMOp<$DestReg, $SrcRegWidth>>::mop_into_ty(Expr::ty(this)).$BeforeUnit(this)
}
})*
const _: () = {
#[hdl]
type $DestReg = ();
$(impl<$DestReg: Type, $SrcRegWidth: Size> MOpInto<RenamedMOp<$DestReg, $SrcRegWidth>> for $AfterOp {
fn mop_into_ty(self) -> RenamedMOp<$DestReg, $SrcRegWidth> {
RenamedMOp[MOpTrait::dest_reg_ty(self)][MOpTrait::src_reg_width(self)]
}
fn mop_into(this: Expr<Self>) -> Expr<RenamedMOp<$DestReg, $SrcRegWidth>> {
MOpInto::<RenamedMOp<$DestReg, $SrcRegWidth>>::mop_into_ty(Expr::ty(this)).$AfterUnit(this)
}
})*
$(impl<$SrcRegWidth: Size> MOpInto<RenamedMOp<$SrcRegWidth>> for $BeforeOp {
fn mop_into_ty(self) -> RenamedMOp<$SrcRegWidth> {
RenamedMOp[MOpTrait::src_reg_width(self)]
}
fn mop_into(this: Expr<Self>) -> Expr<RenamedMOp<$SrcRegWidth>> {
MOpInto::<RenamedMOp<$SrcRegWidth>>::mop_into_ty(Expr::ty(this)).$BeforeUnit(this)
}
})*
$(impl<$SrcRegWidth: Size> MOpInto<RenamedMOp<$SrcRegWidth>> for $AfterOp {
fn mop_into_ty(self) -> RenamedMOp<$SrcRegWidth> {
RenamedMOp[MOpTrait::src_reg_width(self)]
}
fn mop_into(this: Expr<Self>) -> Expr<RenamedMOp<$SrcRegWidth>> {
MOpInto::<RenamedMOp<$SrcRegWidth>>::mop_into_ty(Expr::ty(this)).$AfterUnit(this)
}
})*
};
};
}
@ -253,6 +260,80 @@ pub struct GlobalState {
pub flags_mode: FlagsMode,
}
/// index into the retire queue (the queue of instructions that haven't yet retired)
#[hdl(cmp_eq)]
pub struct RetireQueueIndex<Width: Size> {
/// increases by one for each instruction added to the retire queue.
///
/// this wraps around, so you must not compare it using `cmp_lt`/`cmp_gt`
/// but instead must use [`Self::insns_until`] and compare the output with zero.
pub index: UIntType<Width>,
}
impl<Width: Size> RetireQueueIndex<Width> {
pub fn insns_until(
this: impl ToExpr<Type = Self>,
target: impl ToExpr<Type = Self>,
) -> Expr<SIntType<Width>> {
let this = this.to_expr();
let target = target.to_expr();
assert_eq!(Expr::ty(this), Expr::ty(target));
(this.index - target.index).cast_to(Expr::ty(this).index.as_same_width_sint())
}
}
#[hdl]
pub struct RenamedInsnData<MOp, DestReg, RetireQueueIndexWidth: Size> {
pub retire_queue_index: RetireQueueIndex<RetireQueueIndexWidth>,
pub pc: UInt<64>,
pub dest: DestReg,
pub mop: MOp,
}
#[hdl]
pub struct UnitForwardingInfo<UnitNumWidth: Size, OutRegNumWidth: Size, UnitCount: Size> {
pub unit_output_writes: ArrayType<HdlOption<UnitOutputWrite<OutRegNumWidth>>, UnitCount>,
pub unit_reg_frees: ArrayType<HdlOption<UnitOutRegNum<OutRegNumWidth>>, UnitCount>,
pub _phantom: PhantomData<UnitNumWidth>,
}
#[hdl]
pub struct UnitToRegAlloc<
MOp: Type,
ExtraOut: Type,
UnitNumWidth: Size,
OutRegNumWidth: Size,
UnitCount: Size,
RetireQueueIndexWidth: Size,
> {
#[hdl(flip)]
pub unit_forwarding_info: UnitForwardingInfo<UnitNumWidth, OutRegNumWidth, UnitCount>,
#[hdl(flip)]
pub input:
ReadyValid<RenamedInsnData<MOp, UnitOutRegNum<OutRegNumWidth>, RetireQueueIndexWidth>>,
#[hdl(flip)]
pub cancel_input: HdlOption<UnitCancelInput<OutRegNumWidth>>,
pub output: HdlOption<UnitOutput<OutRegNumWidth, RetireQueueIndexWidth, ExtraOut>>,
}
impl<
MOp: Type,
ExtraOut: Type,
UnitNumWidth: Size,
OutRegNumWidth: Size,
UnitCount: Size,
RetireQueueIndexWidth: Size,
>
UnitToRegAlloc<MOp, ExtraOut, UnitNumWidth, OutRegNumWidth, UnitCount, RetireQueueIndexWidth>
{
pub fn mop_ty(self) -> MOp {
self.input.data.HdlSome.mop
}
pub fn extra_out_ty(self) -> ExtraOut {
self.output.HdlSome.extra_out_ty()
}
}
#[hdl(cmp_eq)]
pub struct UnitResultCompleted<ExtraOut> {
pub value: PRegValue,
@ -261,7 +342,7 @@ pub struct UnitResultCompleted<ExtraOut> {
#[hdl(cmp_eq)]
pub struct UnitOutputWrite<OutRegNumWidth: Size> {
pub which: UnitOutRegNum<OutRegNumWidth>,
pub dest: UnitOutRegNum<OutRegNumWidth>,
pub value: PRegValue,
}
@ -283,20 +364,32 @@ impl<ExtraOut: Type> UnitResult<ExtraOut> {
}
#[hdl]
pub struct UnitOutput<OutRegNumWidth: Size, ExtraOut> {
pub which: UnitOutRegNum<OutRegNumWidth>,
pub struct UnitOutput<OutRegNumWidth: Size, RetireQueueIndexWidth: Size, ExtraOut> {
pub dest: UnitOutRegNum<OutRegNumWidth>,
pub retire_queue_index: RetireQueueIndex<RetireQueueIndexWidth>,
pub result: UnitResult<ExtraOut>,
}
impl<OutRegNumWidth: Size, ExtraOut: Type> UnitOutput<OutRegNumWidth, ExtraOut> {
impl<OutRegNumWidth: Size, RetireQueueIndexWidth: Size, ExtraOut: Type>
UnitOutput<OutRegNumWidth, RetireQueueIndexWidth, ExtraOut>
{
pub fn extra_out_ty(self) -> ExtraOut {
self.result.extra_out_ty()
}
}
#[hdl(cmp_eq)]
pub struct UnitCancelInput<OutRegNumWidth: Size> {
pub which: UnitOutRegNum<OutRegNumWidth>,
pub struct UnitCancelInput<RetireQueueIndexWidth: Size> {
pub target: RetireQueueIndex<RetireQueueIndexWidth>,
}
impl<RetireQueueIndexWidth: Size> UnitCancelInput<RetireQueueIndexWidth> {
pub fn is_canceled(
this: impl ToExpr<Type = Self>,
insn_retire_queue_index: impl ToExpr<Type = RetireQueueIndex<RetireQueueIndexWidth>>,
) -> Expr<Bool> {
RetireQueueIndex::insns_until(insn_retire_queue_index, this.to_expr().target).cmp_ge(0i8)
}
}
pub trait UnitTrait:
@ -312,17 +405,14 @@ pub trait UnitTrait:
fn unit_kind(&self) -> UnitKind;
fn extract_mop(
&self,
mop: Expr<RenamedMOp<UnitOutRegNum<DynSize>, DynSize>>,
) -> Expr<HdlOption<Self::MOp>>;
fn extract_mop(&self, mop: Expr<RenamedMOp<DynSize>>) -> Expr<HdlOption<Self::MOp>>;
fn module(&self) -> Interned<Module<Self::Type>>;
fn unit_to_reg_alloc(
&self,
this: Expr<Self::Type>,
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize>>;
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize, DynSize>>;
fn cd(&self, this: Expr<Self::Type>) -> Expr<ClockDomain>;
@ -370,10 +460,7 @@ impl UnitTrait for DynUnit {
self.unit_kind
}
fn extract_mop(
&self,
mop: Expr<RenamedMOp<UnitOutRegNum<DynSize>, DynSize>>,
) -> Expr<HdlOption<Self::MOp>> {
fn extract_mop(&self, mop: Expr<RenamedMOp<DynSize>>) -> Expr<HdlOption<Self::MOp>> {
self.unit.extract_mop(mop)
}
@ -384,7 +471,7 @@ impl UnitTrait for DynUnit {
fn unit_to_reg_alloc(
&self,
this: Expr<Self::Type>,
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize>> {
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize, DynSize>> {
self.unit.unit_to_reg_alloc(this)
}
@ -425,10 +512,7 @@ impl<T: UnitTrait + Clone + std::hash::Hash + Eq> UnitTrait for DynUnitWrapper<T
self.0.unit_kind()
}
fn extract_mop(
&self,
mop: Expr<RenamedMOp<UnitOutRegNum<DynSize>, DynSize>>,
) -> Expr<HdlOption<Self::MOp>> {
fn extract_mop(&self, mop: Expr<RenamedMOp<DynSize>>) -> Expr<HdlOption<Self::MOp>> {
Expr::from_enum(Expr::as_enum(self.0.extract_mop(mop)))
}
@ -439,7 +523,7 @@ impl<T: UnitTrait + Clone + std::hash::Hash + Eq> UnitTrait for DynUnitWrapper<T
fn unit_to_reg_alloc(
&self,
this: Expr<Self::Type>,
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize>> {
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize, DynSize>> {
Expr::from_bundle(Expr::as_bundle(
self.0.unit_to_reg_alloc(Expr::from_bundle(this)),
))

View file

@ -4,14 +4,14 @@
use crate::{
config::CpuConfig,
instruction::{
AddSubMOp, AluBranchMOp, AluCommonMOp, CommonMOp, LogicalMOp, MOpTrait, OutputIntegerMode,
RenamedMOp, UnitOutRegNum, COMMON_MOP_SRC_LEN,
AddSubMOp, AluBranchMOp, AluCommonMOp, CommonMOp, LogicalMOp, OutputIntegerMode,
RenamedMOp, COMMON_MOP_SRC_LEN,
},
register::{FlagsMode, PRegFlagsPowerISA, PRegFlagsX86, PRegValue},
unit::{
unit_base::{unit_base, ExecuteEnd, ExecuteStart, UnitToRegAlloc},
unit_base::{unit_base, ExecuteEnd, ExecuteStart},
DynUnit, DynUnitWrapper, GlobalState, UnitKind, UnitMOp, UnitOutput, UnitResult,
UnitResultCompleted, UnitTrait,
UnitResultCompleted, UnitToRegAlloc, UnitTrait,
},
};
use fayalite::{
@ -24,7 +24,7 @@ use std::{collections::HashMap, ops::RangeTo};
#[hdl]
fn add_sub<SrcCount: KnownSize>(
mop: Expr<AddSubMOp<UnitOutRegNum<DynSize>, DynSize, SrcCount>>,
mop: Expr<AddSubMOp<(), DynSize, SrcCount>>,
pc: Expr<UInt<64>>,
flags_mode: Expr<FlagsMode>,
src_values: Expr<Array<PRegValue, { COMMON_MOP_SRC_LEN }>>,
@ -232,7 +232,7 @@ fn add_sub<SrcCount: KnownSize>(
#[hdl]
fn logical(
mop: Expr<LogicalMOp<UnitOutRegNum<DynSize>, DynSize>>,
mop: Expr<LogicalMOp<(), DynSize>>,
flags_mode: Expr<FlagsMode>,
src_values: Expr<Array<PRegValue, { COMMON_MOP_SRC_LEN }>>,
) -> Expr<UnitResultCompleted<()>> {
@ -250,15 +250,13 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) {
let cd: ClockDomain = m.input();
#[hdl]
let unit_to_reg_alloc: UnitToRegAlloc<
AluBranchMOp<UnitOutRegNum<DynSize>, DynSize>,
AluBranchMOp<(), DynSize>,
(),
DynSize,
DynSize,
DynSize,
> = m.output(config.unit_to_reg_alloc(
AluBranchMOp[config.unit_out_reg_num()][config.p_reg_num_width()],
(),
));
DynSize,
> = m.output(config.unit_to_reg_alloc(AluBranchMOp[()][config.p_reg_num_width()], ()));
#[hdl]
let global_state: GlobalState = m.input();
@ -279,24 +277,21 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) {
#[hdl]
if let HdlSome(execute_start) = ReadyValid::firing_data(unit_base.execute_start) {
#[hdl]
let ExecuteStart::<_> {
mop,
pc,
src_values,
} = execute_start;
let ExecuteStart::<_, _, _> { insn, src_values } = execute_start;
#[hdl]
match mop {
match insn.mop {
AluBranchMOp::<_, _>::AddSub(mop) => connect(
unit_base.execute_end,
HdlSome(
#[hdl]
ExecuteEnd::<_, _> {
ExecuteEnd::<_, _, _> {
unit_output: #[hdl]
UnitOutput::<_, _> {
which: MOpTrait::dest_reg(mop),
UnitOutput::<_, _, _> {
dest: insn.dest,
retire_queue_index: insn.retire_queue_index,
result: UnitResult[()].Completed(add_sub(
mop,
pc,
insn.pc,
global_state.flags_mode,
src_values,
)),
@ -308,13 +303,14 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) {
unit_base.execute_end,
HdlSome(
#[hdl]
ExecuteEnd::<_, _> {
ExecuteEnd::<_, _, _> {
unit_output: #[hdl]
UnitOutput::<_, _> {
which: MOpTrait::dest_reg(mop),
UnitOutput::<_, _, _> {
dest: insn.dest,
retire_queue_index: insn.retire_queue_index,
result: UnitResult[()].Completed(add_sub(
mop,
pc,
insn.pc,
global_state.flags_mode,
src_values,
)),
@ -326,10 +322,11 @@ pub fn alu_branch(config: &CpuConfig, unit_index: usize) {
unit_base.execute_end,
HdlSome(
#[hdl]
ExecuteEnd::<_, _> {
ExecuteEnd::<_, _, _> {
unit_output: #[hdl]
UnitOutput::<_, _> {
which: MOpTrait::dest_reg(mop),
UnitOutput::<_, _, _> {
dest: insn.dest,
retire_queue_index: insn.retire_queue_index,
result: UnitResult[()].Completed(logical(
mop,
global_state.flags_mode,
@ -361,7 +358,7 @@ impl AluBranch {
impl UnitTrait for AluBranch {
type Type = alu_branch;
type ExtraOut = ();
type MOp = AluBranchMOp<UnitOutRegNum<DynSize>, DynSize>;
type MOp = AluBranchMOp<(), DynSize>;
fn ty(&self) -> Self::Type {
self.module.io_ty()
@ -379,10 +376,7 @@ impl UnitTrait for AluBranch {
UnitKind::AluBranch
}
fn extract_mop(
&self,
mop: Expr<RenamedMOp<UnitOutRegNum<DynSize>, DynSize>>,
) -> Expr<HdlOption<Self::MOp>> {
fn extract_mop(&self, mop: Expr<RenamedMOp<DynSize>>) -> Expr<HdlOption<Self::MOp>> {
UnitMOp::alu_branch_mop(mop)
}
@ -393,7 +387,7 @@ impl UnitTrait for AluBranch {
fn unit_to_reg_alloc(
&self,
this: Expr<Self::Type>,
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize>> {
) -> Expr<UnitToRegAlloc<Self::MOp, Self::ExtraOut, DynSize, DynSize, DynSize, DynSize>> {
this.unit_to_reg_alloc
}

View file

@ -5,69 +5,28 @@ use crate::{
config::CpuConfig,
instruction::{MOpTrait, PRegNum, UnitNum, UnitOutRegNum, COMMON_MOP_SRC_LEN},
register::PRegValue,
unit::{UnitCancelInput, UnitOutput, UnitOutputWrite},
util::tree_reduce::tree_reduce,
unit::{
RenamedInsnData, UnitCancelInput, UnitForwardingInfo, UnitOutput, UnitOutputWrite,
UnitToRegAlloc,
},
};
use fayalite::{
memory::splat_mask,
module::{memory_with_loc, wire_with_loc},
prelude::*,
ty::StaticType,
util::ready_valid::ReadyValid,
util::{prefix_sum::reduce, ready_valid::ReadyValid},
};
use std::marker::PhantomData;
#[hdl]
pub struct UnitForwardingInfo<UnitNumWidth: Size, OutRegNumWidth: Size, UnitCount: Size> {
pub unit_output_writes: ArrayType<HdlOption<UnitOutputWrite<OutRegNumWidth>>, UnitCount>,
pub unit_reg_frees: ArrayType<HdlOption<UnitOutRegNum<OutRegNumWidth>>, UnitCount>,
pub _phantom: PhantomData<UnitNumWidth>,
}
#[hdl]
pub struct UnitInput<MOp: Type> {
pub mop: MOp,
pub pc: UInt<64>,
}
#[hdl]
pub struct UnitToRegAlloc<
MOp: Type,
ExtraOut: Type,
UnitNumWidth: Size,
OutRegNumWidth: Size,
UnitCount: Size,
> {
#[hdl(flip)]
pub unit_forwarding_info: UnitForwardingInfo<UnitNumWidth, OutRegNumWidth, UnitCount>,
#[hdl(flip)]
pub input: ReadyValid<UnitInput<MOp>>,
#[hdl(flip)]
pub cancel_input: HdlOption<UnitCancelInput<OutRegNumWidth>>,
pub output: HdlOption<UnitOutput<OutRegNumWidth, ExtraOut>>,
}
impl<MOp: Type, ExtraOut: Type, UnitNumWidth: Size, OutRegNumWidth: Size, UnitCount: Size>
UnitToRegAlloc<MOp, ExtraOut, UnitNumWidth, OutRegNumWidth, UnitCount>
{
pub fn mop_ty(self) -> MOp {
self.input.data.HdlSome.mop
}
pub fn extra_out_ty(self) -> ExtraOut {
self.output.HdlSome.extra_out_ty()
}
}
#[hdl]
pub struct ExecuteStart<MOp: Type + MOpTrait<DestReg = UnitOutRegNum<DynSize>>> {
pub mop: MOp,
pub pc: UInt<64>,
pub struct ExecuteStart<MOp: Type, OutRegNumWidth: Size, RetireQueueIndexWidth: Size> {
pub insn: RenamedInsnData<MOp, UnitOutRegNum<OutRegNumWidth>, RetireQueueIndexWidth>,
pub src_values: Array<PRegValue, { COMMON_MOP_SRC_LEN }>,
}
#[hdl]
pub struct ExecuteEnd<OutRegNumWidth: Size, ExtraOut> {
pub unit_output: UnitOutput<OutRegNumWidth, ExtraOut>,
pub struct ExecuteEnd<OutRegNumWidth: Size, RetireQueueIndexWidth: Size, ExtraOut> {
pub unit_output: UnitOutput<OutRegNumWidth, RetireQueueIndexWidth, ExtraOut>,
}
#[hdl]
@ -148,10 +107,9 @@ impl InFlightOpState {
}
#[hdl]
struct InFlightOp<MOp: Type> {
struct InFlightOp<MOp: Type, OutRegNumWidth: Size, RetireQueueIndexWidth: Size> {
state: InFlightOpState,
mop: MOp,
pc: UInt<64>,
insn: RenamedInsnData<MOp, UnitOutRegNum<OutRegNumWidth>, RetireQueueIndexWidth>,
src_ready_flags: Array<Bool, { COMMON_MOP_SRC_LEN }>,
}
@ -166,7 +124,7 @@ impl<OpIndexWidth: Size> InFlightOpsSummary<OpIndexWidth> {
fn new<MOp: Type>(
op_index: usize,
op_index_ty: UIntType<OpIndexWidth>,
in_flight_op: impl ToExpr<Type = HdlOption<InFlightOp<MOp>>>,
in_flight_op: impl ToExpr<Type = HdlOption<InFlightOp<MOp, DynSize, DynSize>>>,
) -> Expr<Self> {
let empty_op_index = wire_with_loc(
&format!("empty_op_index_{op_index}"),
@ -183,10 +141,9 @@ impl<OpIndexWidth: Size> InFlightOpsSummary<OpIndexWidth> {
#[hdl]
if let HdlSome(in_flight_op) = in_flight_op {
#[hdl]
let InFlightOp::<_> {
let InFlightOp::<_, _, _> {
state,
mop: _,
pc: _,
insn: _,
src_ready_flags,
} = in_flight_op;
connect(ready_op_index, HdlOption[op_index_ty].HdlNone());
@ -224,13 +181,15 @@ impl<OpIndexWidth: Size> InFlightOpsSummary<OpIndexWidth> {
impl InFlightOpsSummary<DynSize> {
fn summarize<MOp: Type, MaxInFlight: Size>(
in_flight_ops: impl ToExpr<Type = ArrayType<HdlOption<InFlightOp<MOp>>, MaxInFlight>>,
in_flight_ops: impl ToExpr<
Type = ArrayType<HdlOption<InFlightOp<MOp, DynSize, DynSize>>, MaxInFlight>,
>,
) -> Expr<Self> {
let in_flight_ops = in_flight_ops.to_expr();
let max_in_flight = Expr::ty(in_flight_ops).len();
let index_range = 0..max_in_flight;
let index_ty = UInt::range(index_range.clone());
tree_reduce(
reduce(
index_range.map(|i| Self::new(i, index_ty, in_flight_ops[i])),
Self::combine,
)
@ -239,10 +198,7 @@ impl InFlightOpsSummary<DynSize> {
}
#[hdl_module]
pub fn unit_base<
MOp: Type + MOpTrait<DestReg = UnitOutRegNum<DynSize>, SrcRegWidth = DynSize>,
ExtraOut: Type,
>(
pub fn unit_base<MOp: Type + MOpTrait<DestReg = (), SrcRegWidth = DynSize>, ExtraOut: Type>(
config: &CpuConfig,
unit_index: usize,
mop_ty: MOp,
@ -251,18 +207,20 @@ pub fn unit_base<
#[hdl]
let cd: ClockDomain = m.input();
#[hdl]
let unit_to_reg_alloc: UnitToRegAlloc<MOp, ExtraOut, DynSize, DynSize, DynSize> =
let unit_to_reg_alloc: UnitToRegAlloc<MOp, ExtraOut, DynSize, DynSize, DynSize, DynSize> =
m.output(config.unit_to_reg_alloc(mop_ty, extra_out_ty));
#[hdl]
let execute_start: ReadyValid<ExecuteStart<MOp>> = m.output(ReadyValid[ExecuteStart[mop_ty]]);
let execute_start: ReadyValid<ExecuteStart<MOp, DynSize, DynSize>> =
m.output(ReadyValid[config.execute_start(mop_ty)]);
#[hdl]
let execute_end: HdlOption<ExecuteEnd<DynSize, ExtraOut>> =
m.input(HdlOption[ExecuteEnd[config.out_reg_num_width][extra_out_ty]]);
let execute_end: HdlOption<ExecuteEnd<DynSize, DynSize, ExtraOut>> =
m.input(HdlOption[config.execute_end(extra_out_ty)]);
connect(execute_start.data, Expr::ty(execute_start).data.HdlNone());
let max_in_flight = config.unit_max_in_flight(unit_index).get();
let in_flight_op_ty = InFlightOp[mop_ty];
let in_flight_op_ty =
InFlightOp[mop_ty][config.out_reg_num_width][config.retire_queue_index_width()];
#[hdl]
let in_flight_ops = reg_builder()
.clock_domain(cd)
@ -361,15 +319,15 @@ pub fn unit_base<
connect(ready_write_port.mask, true);
#[hdl]
if let HdlSome(unit_output_write) = unit_output_writes[unit_index] {
connect_any(write_port.addr, unit_output_write.which.value);
connect_any(write_port.addr, unit_output_write.dest.value);
connect(write_port.data, unit_output_write.value);
connect(write_port.en, true);
connect_any(ready_write_port.addr, unit_output_write.which.value);
connect_any(ready_write_port.addr, unit_output_write.dest.value);
connect(ready_write_port.en, true);
let p_reg_num = #[hdl]
PRegNum::<_, _> {
unit_num: config.unit_num().from_index(unit_index),
unit_out_reg: unit_output_write.which,
unit_out_reg: unit_output_write.dest,
};
for src_index in 0..COMMON_MOP_SRC_LEN {
#[hdl]
@ -399,9 +357,8 @@ pub fn unit_base<
execute_start.data,
HdlSome(
#[hdl]
ExecuteStart::<_> {
mop: in_flight_op.mop,
pc: in_flight_op.pc,
ExecuteStart::<_, _, _> {
insn: in_flight_op.insn,
src_values: read_src_values,
},
),
@ -420,7 +377,12 @@ pub fn unit_base<
#[hdl]
if let HdlSome(input) = ReadyValid::firing_data(unit_to_reg_alloc.input) {
#[hdl]
let UnitInput::<_> { mop, pc } = input;
let RenamedInsnData::<_, _, _> {
retire_queue_index,
pc: _,
dest: _,
mop,
} = input;
#[hdl]
let input_mop_src_regs = wire(mop_ty.src_regs_ty());
connect(
@ -436,20 +398,24 @@ pub fn unit_base<
connect(src_ready_flags, input_src_regs_valid);
connect(input_src_regs, input_mop_src_regs);
#[hdl]
if unit_to_reg_alloc.cancel_input.cmp_ne(HdlSome(
#[hdl]
UnitCancelInput::<_> {
which: MOp::dest_reg(mop),
},
)) {
let input_is_canceled = wire();
connect(input_is_canceled, false);
#[hdl]
if let HdlSome(cancel_input) = unit_to_reg_alloc.cancel_input {
connect(
input_is_canceled,
UnitCancelInput::is_canceled(cancel_input, retire_queue_index),
);
}
#[hdl]
if !input_is_canceled {
connect(
input_in_flight_op,
HdlSome(
#[hdl]
InFlightOp::<_> {
InFlightOp::<_, _, _> {
state: InFlightOpState.Ready(),
mop,
pc,
insn: input,
src_ready_flags,
},
),
@ -483,13 +449,11 @@ pub fn unit_base<
#[hdl]
if let HdlSome(in_flight_op) = in_flight_ops[in_flight_op_index] {
#[hdl]
let InFlightOp::<_> {
let InFlightOp::<_, _, _> {
state,
mop,
pc,
insn,
src_ready_flags,
} = in_flight_op;
let which = MOp::dest_reg(mop);
let src_regs = wire_with_loc(
&format!("in_flight_op_src_regs_{in_flight_op_index}"),
SourceLocation::caller(),
@ -499,7 +463,7 @@ pub fn unit_base<
src_regs,
repeat(config.p_reg_num().const_zero().cast_to_bits(), ConstUsize),
);
MOp::connect_src_regs(mop, src_regs);
MOp::connect_src_regs(insn.mop, src_regs);
#[hdl]
if in_flight_ops_summary.ready_op_index.cmp_eq(HdlSome(
@ -517,7 +481,7 @@ pub fn unit_base<
if let HdlSome(unit_output_write) = unit_output_writes[unit_index] {
#[hdl]
let UnitOutputWrite::<_> {
which: unit_out_reg,
dest: unit_out_reg,
value: _,
} = unit_output_write;
let p_reg_num = #[hdl]
@ -537,20 +501,21 @@ pub fn unit_base<
}
}
connect(
in_flight_op_canceling[in_flight_op_index],
unit_to_reg_alloc.cancel_input.cmp_eq(HdlSome(
#[hdl]
UnitCancelInput::<_> { which },
)),
);
connect(in_flight_op_canceling[in_flight_op_index], false);
#[hdl]
if let HdlSome(cancel_input) = unit_to_reg_alloc.cancel_input {
connect(
in_flight_op_canceling[in_flight_op_index],
UnitCancelInput::is_canceled(cancel_input, insn.retire_queue_index),
);
}
#[hdl]
if let HdlSome(execute_end) = execute_end {
#[hdl]
let ExecuteEnd::<_, _> { unit_output } = execute_end;
let ExecuteEnd::<_, _, _> { unit_output } = execute_end;
#[hdl]
if which.cmp_eq(unit_output.which) {
if insn.dest.cmp_eq(unit_output.dest) {
connect(in_flight_op_execute_ending[in_flight_op_index], true);
#[hdl]
if !in_flight_op_canceling[in_flight_op_index] {
@ -567,7 +532,7 @@ pub fn unit_base<
#[hdl]
if let HdlSome(execute_start) = ReadyValid::firing_data(execute_start) {
#[hdl]
if which.cmp_eq(MOp::dest_reg(execute_start.mop)) {
if insn.dest.cmp_eq(execute_start.insn.dest) {
connect(in_flight_op_execute_starting[in_flight_op_index], true);
}
}
@ -594,10 +559,9 @@ pub fn unit_base<
in_flight_ops[in_flight_op_index],
HdlSome(
#[hdl]
InFlightOp::<_> {
InFlightOp::<_, _, _> {
state,
mop,
pc,
insn,
src_ready_flags: in_flight_op_next_src_ready_flags[in_flight_op_index],
},
),

View file

@ -2,7 +2,6 @@
// See Notices.txt for copyright information
pub mod array_vec;
pub mod tree_reduce;
pub(crate) const fn range_u32_len(range: &std::ops::Range<u32>) -> usize {
let retval = range.end.saturating_sub(range.start);
@ -25,3 +24,16 @@ pub(crate) const fn range_u32_nth_or_panic(range: &std::ops::Range<u32>, index:
panic!("index out of range")
}
}
pub(crate) const fn range_intersection(
a: &std::ops::Range<u32>,
b: &std::ops::Range<u32>,
) -> Option<std::ops::Range<u32>> {
let start = if a.start > b.start { a.start } else { b.start };
let end = if a.end < b.end { a.end } else { b.end };
if start < end {
Some(start..end)
} else {
None
}
}

View file

@ -2,8 +2,11 @@
// See Notices.txt for copyright information
use fayalite::{
expr::ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd},
int::SizeType,
expr::{
ops::{ExprCastTo, ExprIndex, ExprPartialEq, ExprPartialOrd},
ToLiteralBits,
},
int::{IntType, SizeType},
intern::{Intern, Interned},
prelude::*,
ty::{MatchVariantWithoutScope, StaticType, TypeProperties},
@ -249,6 +252,29 @@ impl<T: Type, N: Size> ArrayVec<T, N> {
});
array_vec_as_array_of_options
}
#[hdl]
pub fn get<Idx: IntType<Dyn = UInt>>(
this: impl ToExpr<Type = Self>,
index: impl ToExpr<Type = Idx>,
) -> Expr<HdlOption<T>> {
let this = this.to_expr();
let index = Expr::as_dyn_int(index.to_expr());
let never_in_bounds = index.cmp_ge(Expr::ty(this).capacity());
if let Ok(never_in_bounds) = never_in_bounds.to_literal_bits() {
if never_in_bounds[0] {
// avoid error from out-of-bounds constant index
return HdlOption[Expr::ty(this).element()].HdlNone();
}
}
#[hdl]
let array_vec_get = wire(HdlOption[Expr::ty(this).element()]);
connect(array_vec_get, Expr::ty(array_vec_get).HdlNone());
#[hdl]
if index.cmp_lt(Length::as_uint(Self::len(this))) {
connect(array_vec_get, HdlSome(this.elements[index]));
}
array_vec_get
}
}
impl<T: Type, N: Size, Idx, IdxWidth: Size> ExprIndex<Idx> for ArrayVec<T, N>
@ -263,3 +289,35 @@ where
<ArrayType<T, N> as ExprIndex<Idx>>::expr_index(&this.elements, index)
}
}
#[hdl]
pub struct ReadyValidArray<T: Type, N: Size> {
pub data: ArrayVec<T, N>,
#[hdl(flip)]
pub ready: Length<N>,
}
impl<T: Type, N: Size> ReadyValidArray<T, N> {
#[hdl]
pub fn firing_len(this: impl ToExpr<Type = Self>) -> Expr<Length<N>> {
let this = this.to_expr();
assert_eq!(Expr::ty(this).data.len_ty(), Expr::ty(this).ready);
#[hdl]
let firing_len = wire(Expr::ty(this).data.len);
connect(firing_len, this.data.len);
#[hdl]
if this.data.len.cmp_gt(this.ready) {
connect(firing_len, this.ready);
}
firing_len
}
#[hdl]
pub fn firing_data(this: impl ToExpr<Type = Self>) -> Expr<ArrayVec<T, N>> {
let this = this.to_expr();
#[hdl]
let firing_data = wire(Expr::ty(this).data);
connect(firing_data, this.data);
connect(firing_data.len, Self::firing_len(this));
firing_data
}
}

View file

@ -1,152 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
pub enum TreeReduceOp {
Input,
Reduce,
}
#[derive(Copy, Clone, Debug)]
struct Entry {
start: usize,
depth: u32,
}
#[derive(Clone, Debug)]
pub struct TreeReduceOps {
len: usize,
stack: Vec<Entry>,
}
impl TreeReduceOps {
pub fn new(len: usize) -> Self {
TreeReduceOps {
len,
stack: Vec::new(),
}
}
}
impl Iterator for TreeReduceOps {
type Item = TreeReduceOp;
fn next(&mut self) -> Option<Self::Item> {
match *self.stack {
[] if self.len != 0 => {
self.stack.push(Entry { start: 0, depth: 0 });
Some(TreeReduceOp::Input)
}
[.., ref mut second_last, last] if second_last.depth == last.depth => {
second_last.depth += 1;
self.stack.pop();
Some(TreeReduceOp::Reduce)
}
[.., last] if self.len - last.start > 1 << last.depth => {
let start = last.start + (1 << last.depth);
self.stack.push(Entry { start, depth: 0 });
Some(TreeReduceOp::Input)
}
[.., ref mut second_last, _] => {
second_last.depth += 1;
self.stack.pop();
Some(TreeReduceOp::Reduce)
}
_ => None,
}
}
}
#[track_caller]
pub fn tree_reduce_with_state<S, I, R>(
iter: impl IntoIterator<IntoIter: ExactSizeIterator, Item = I>,
state: &mut S,
mut input: impl FnMut(&mut S, I) -> R,
mut reduce: impl FnMut(&mut S, R, R) -> R,
) -> Option<R> {
let mut stack = Vec::new();
let mut iter = iter.into_iter();
for op in TreeReduceOps::new(iter.len()) {
match op {
TreeReduceOp::Input => stack.push(input(
state,
iter.next().expect("inconsistent iterator len() and next()"),
)),
TreeReduceOp::Reduce => {
let Some(r) = stack.pop() else {
unreachable!();
};
let Some(l) = stack.pop() else {
unreachable!();
};
stack.push(reduce(state, l, r));
}
}
}
stack.pop()
}
pub fn tree_reduce<T>(
iter: impl IntoIterator<Item = T, IntoIter: ExactSizeIterator>,
mut reduce: impl FnMut(T, T) -> T,
) -> Option<T> {
tree_reduce_with_state(iter, &mut (), |_, v| v, move |_, l, r| reduce(l, r))
}
#[cfg(test)]
mod tests {
use super::*;
use std::ops::Range;
fn recursive_tree_reduce(range: Range<usize>, ops: &mut Vec<TreeReduceOp>) {
if range.len() == 1 {
ops.push(TreeReduceOp::Input);
return;
}
if range.is_empty() {
return;
}
let pow2_len = range.len().next_power_of_two();
let split = range.start + pow2_len / 2;
recursive_tree_reduce(range.start..split, ops);
recursive_tree_reduce(split..range.end, ops);
ops.push(TreeReduceOp::Reduce);
}
#[test]
fn test_tree_reduce() {
const EXPECTED: &'static [&'static [TreeReduceOp]] = {
use TreeReduceOp::{Input as I, Reduce as R};
&[
&[],
&[I],
&[I, I, R],
&[I, I, R, I, R],
&[I, I, R, I, I, R, R],
&[I, I, R, I, I, R, R, I, R],
&[I, I, R, I, I, R, R, I, I, R, R],
&[I, I, R, I, I, R, R, I, I, R, I, R, R],
&[I, I, R, I, I, R, R, I, I, R, I, I, R, R, R],
]
};
for len in 0..64 {
let mut expected = vec![];
recursive_tree_reduce(0..len, &mut expected);
if let Some(&expected2) = EXPECTED.get(len) {
assert_eq!(*expected, *expected2, "len={len}");
}
assert_eq!(
TreeReduceOps::new(len).collect::<Vec<_>>(),
expected,
"len={len}"
);
let seq: Vec<_> = (0..len).collect();
assert_eq!(
seq,
tree_reduce(seq.iter().map(|&v| vec![v]), |mut l, r| {
l.extend_from_slice(&r);
l
})
.unwrap_or_default()
);
}
}
}