3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-04-23 09:05:32 +00:00

add PolarFire FPGA support

This commit is contained in:
Chun Lin Min 2024-07-02 12:44:30 -07:00
parent a739e21a5f
commit acddc36389
59 changed files with 5389 additions and 0 deletions

View file

@ -37,6 +37,17 @@ $(eval $(call add_extra_objs,passes/pmgen/xilinx_dsp_cascade_pm.h))
# --------------------------------------
OBJS += passes/pmgen/mchp_dsp.o
GENFILES += passes/pmgen/mchp_dsp_pm.h
GENFILES += passes/pmgen/mchp_dsp_CREG_pm.h
GENFILES += passes/pmgen/mchp_dsp_cascade_pm.h
passes/pmgen/mchp_dsp.o: passes/pmgen/mchp_dsp_pm.h passes/pmgen/mchp_dsp_CREG_pm.h passes/pmgen/mchp_dsp_cascade_pm.h
$(eval $(call add_extra_objs,passes/pmgen/mchp_dsp_pm.h))
$(eval $(call add_extra_objs,passes/pmgen/mchp_dsp_CREG_pm.h))
$(eval $(call add_extra_objs,passes/pmgen/mchp_dsp_cascade_pm.h))
# --------------------------------------
OBJS += passes/pmgen/peepopt.o
GENFILES += passes/pmgen/peepopt_pm.h
passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h

373
passes/pmgen/mchp_dsp.cc Normal file
View file

@ -0,0 +1,373 @@
/*
ISC License
Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
#include <deque>
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "passes/pmgen/mchp_dsp_pm.h"
#include "passes/pmgen/mchp_dsp_CREG_pm.h"
#include "passes/pmgen/mchp_dsp_cascade_pm.h"
void mchp_dsp_pack(mchp_dsp_pm &pm)
{
auto &st = pm.st_mchp_dsp_pack;
log("Analysing %s.%s for MCHP MACC_PA packing.\n", log_id(pm.module), log_id(st.dsp));
Cell *cell = st.dsp;
//pack pre-adder
if (st.preAdderStatic) {
SigSpec &pasub = cell->connections_.at(ID(PASUB));
log(" static PASUB preadder %s (%s)\n", log_id(st.preAdderStatic), log_id(st.preAdderStatic->type));
bool D_SIGNED = st.preAdderStatic->getParam(ID::B_SIGNED).as_bool();
bool B_SIGNED = st.preAdderStatic->getParam(ID::A_SIGNED).as_bool();
st.sigB.extend_u0(18, B_SIGNED);
st.sigD.extend_u0(18, D_SIGNED);
if (st.moveBtoA)
{
cell->setPort(ID::A, st.sigA); // if pre-adder feeds into A, original sigB will be moved to port A
}
cell->setPort(ID::B, st.sigB);
cell->setPort(ID::D, st.sigD);
// MACC_PA supports both addition and subtraction with the pre-adder.
// Affects the sign of the 'D' port.
if (st.preAdderStatic->type == ID($add))
pasub[0] = State::S0;
else if (st.preAdderStatic->type == ID($sub))
pasub[0] = State::S1;
else
log_assert(!"strange pre-adder type");
pm.autoremove(st.preAdderStatic);
}
//pack post-adder
if (st.postAdderStatic) {
log(" postadder %s (%s)\n", log_id(st.postAdderStatic), log_id(st.postAdderStatic->type));
SigSpec &sub = cell->connections_.at(ID(SUB));
// Post-adder in MACC_PA also supports subtraction
// Determines the sign of the output from the multiplier.
if (st.postAdderStatic->type == ID($add))
sub[0] = State::S0;
else if (st.postAdderStatic->type == ID($sub))
sub[0] = State::S1;
else
log_assert(!"strange post-adder type");
if (st.useFeedBack) {
cell->setPort(ID(CDIN_FDBK_SEL), {State::S0, State::S1});
} else {
st.sigC.extend_u0(48, st.postAdderStatic->getParam(ID::A_SIGNED).as_bool());
cell->setPort(ID::C, st.sigC);
}
pm.autoremove(st.postAdderStatic);
}
// pack registers
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
// function to absorb a register
auto f = [&pm,cell](SigSpec &A, Cell* ff, IdString ceport, IdString rstport, IdString bypass) {
// input/output ports
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol_n = !ff->getParam(ID::SRST_POLARITY).as_bool();
// active low sync rst
cell->setPort(rstport, rstpol_n ? srst : pm.module->Not(NEW_ID, srst));
} else if (ff->type.in(ID($adff), ID($adffe))) {
SigSpec arst = ff->getPort(ID::ARST);
bool rstpol_n = !ff->getParam(ID::ARST_POLARITY).as_bool();
// active low async rst
cell->setPort(rstport, rstpol_n ? arst : pm.module->Not(NEW_ID, arst));
} else {
// active low async/sync rst
cell->setPort(rstport, State::S1);
}
}
if (ff->type.in(ID($dffe), ID($sdffe), ID($adffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
// enables are all active high
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
}
else {
// enables are all active high
cell->setPort(ceport, State::S1);
}
// bypass set to 0
cell->setPort(bypass, State::S0);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset+c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second[i] = State::Sx;
}
}
};
// NOTE: flops are not autoremoved because it is possible that they
// are only partially absorbed into DSP, or have fanouts.
if (st.ffA) {
SigSpec A = cell->getPort(ID::A);
if (st.ffA) {
f(A, st.ffA, ID(A_EN), ID(A_SRST_N), ID(A_BYPASS));
}
pm.add_siguser(A, cell);
cell->setPort(ID::A, A);
}
if (st.ffB) {
SigSpec B = cell->getPort(ID::B);
if (st.ffB) {
f(B, st.ffB, ID(B_EN), ID(B_SRST_N), ID(B_BYPASS));
}
pm.add_siguser(B, cell);
cell->setPort(ID::B, B);
}
if (st.ffD) {
SigSpec D = cell->getPort(ID::D);
if (st.ffD->type.in(ID($adff), ID($adffe))) {
f(D, st.ffD, ID(D_EN), ID(D_ARST_N), ID(D_BYPASS));
} else {
f(D, st.ffD, ID(D_EN), ID(D_SRST_N), ID(D_BYPASS));
}
pm.add_siguser(D, cell);
cell->setPort(ID::D, D);
}
if (st.ffP) {
SigSpec P; // unused
f(P, st.ffP, ID(P_EN), ID(P_SRST_N), ID(P_BYPASS));
st.ffP->connections_.at(ID::Q).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
}
log(" clock: %s (%s)\n", log_signal(st.clock), "posedge");
if (st.ffA)
log(" \t ffA:%s\n", log_id(st.ffA));
if (st.ffB)
log(" \t ffB:%s\n", log_id(st.ffB));
if (st.ffD)
log(" \t ffD:%s\n", log_id(st.ffD));
if (st.ffP)
log(" \t ffP:%s\n", log_id(st.ffP));
}
log("\n");
SigSpec P = st.sigP;
if (GetSize(P) < 48)
P.append(pm.module->addWire(NEW_ID, 48-GetSize(P)));
cell->setPort(ID::P, P);
pm.blacklist(cell);
}
// For packing cascaded DSPs
void mchp_dsp_packC(mchp_dsp_CREG_pm &pm)
{
auto &st = pm.st_mchp_dsp_packC;
log_debug("Analysing %s.%s for MCHP DSP packing (REG_C).\n", log_id(pm.module), log_id(st.dsp));
log_debug("ffC: %s\n", log_id(st.ffC, "--"));
Cell *cell = st.dsp;
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
// same function as above, used for the last CREG we need to absorb
auto f = [&pm,cell](SigSpec &A, Cell* ff, IdString ceport, IdString rstport, IdString bypass) {
// input/output ports
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol_n = !ff->getParam(ID::SRST_POLARITY).as_bool();
// active low sync rst
cell->setPort(rstport, rstpol_n ? srst : pm.module->Not(NEW_ID, srst));
} else if (ff->type.in(ID($adff), ID($adffe))) {
SigSpec arst = ff->getPort(ID::ARST);
bool rstpol_n = !ff->getParam(ID::ARST_POLARITY).as_bool();
// active low async rst
cell->setPort(rstport, rstpol_n ? arst : pm.module->Not(NEW_ID, arst));
} else {
// active low async/sync rst
cell->setPort(rstport, State::S1);
}
}
if (ff->type.in(ID($dffe), ID($sdffe), ID($adffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
// enables are all active high
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
} else {
// enables are all active high
cell->setPort(ceport, State::S1);
}
// bypass set to 0
cell->setPort(bypass, State::S0);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset+c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second[i] = State::Sx;
}
}
};
if (st.ffC) {
SigSpec C = cell->getPort(ID::C);
if (st.ffC->type.in(ID($adff), ID($adffe))) {
f(C, st.ffC, ID(C_EN), ID(C_ARST_N), ID(C_BYPASS));
} else {
f(C, st.ffC, ID(C_EN), ID(C_SRST_N), ID(C_BYPASS));
}
pm.add_siguser(C, cell);
cell->setPort(ID::C, C);
}
log(" clock: %s (%s)", log_signal(st.clock), "posedge");
if (st.ffC)
log(" ffC:%s", log_id(st.ffC));
log("\n");
}
pm.blacklist(cell);
}
struct MchpDspPass : public Pass {
MchpDspPass() : Pass("mchp_dsp", "MCHP: pack resources into DSPs") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" mchp_dsp [options] [selection]\n");
log("\n");
log("Pack input registers 'A', 'B', 'C', and 'D' (with optional enable/reset),\n");
log("output register 'P' (with optional enable/reset), pre-adder and/or post-adder into\n");
log("MCHP DSP resources.\n");
log("\n");
log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n");
log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n");
log("used to override the current accumulation result with a new value. This will\n");
log("be added to the multiplier result to form the next accumulation result.\n");
log("\n");
log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n");
log("connections (optionally, where 'P' is right-shifted by 17-bits and used as an\n");
log("input to the post-adder. This pattern is common for summing partial products to\n");
log("implement wide multipliers). Cascade chains are limited to a mazimum length \n");
log("of 24 cells, corresponding to PolarFire (pf) devices.\n");
log("\n");
log("This pass is a no-op if the scratchpad variable 'mchp_dsp.multonly' is set\n");
log("to 1.\n");
log("\n");
log("\n");
log(" -family {pf}\n");
log(" select the family to target\n");
log(" default: pf\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing MCHP_DSP pass (pack resources into DSPs).\n");
std::string family = "pf";
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
if ((args[argidx] == "-family") && argidx+1 < args.size()) {
family = args[++argidx];
continue;
}
break;
}
extra_args(args, argidx, design);
for (auto module : design->selected_modules()) {
if (design->scratchpad_get_bool("mchp_dsp.multonly"))
continue;
{
// For more details on PolarFire MACC_PA, consult
// the "PolarFire FPGA Macro Library Guide"
// Main pattern matching step to capture a DSP cell.
// Match for pre-adder, post-adder, as well as
// registers 'A', 'B', 'D', and 'P'. Additionally,
// check for an accumulator pattern based on whether
// a post-adder and PREG are both present AND
// if PREG feeds into this post-adder.
mchp_dsp_pm pm(module, module->selected_cells());
pm.run_mchp_dsp_pack(mchp_dsp_pack);
}
// Separating out CREG packing is necessary since there
// is no guarantee that the cell ordering corresponds
// to the "expected" case (i.e. the order in which
// they appear in the source). There existed the possibility
// where a register got packed as a CREG into a
// downstream DSP that should have otherwise been a
// PREG of an upstream DSP that had not been visited
// yet
{
mchp_dsp_CREG_pm pm(module, module->selected_cells());
pm.run_mchp_dsp_packC(mchp_dsp_packC);
}
// Lastly, identify and utilise PCOUT -> PCIN chains
{
mchp_dsp_cascade_pm pm(module, module->selected_cells());
pm.run_mchp_dsp_cascade();
}
}
}
} MchpDspPass;
PRIVATE_NAMESPACE_END

440
passes/pmgen/mchp_dsp.pmg Normal file
View file

@ -0,0 +1,440 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the main pattern matcher setup (of three total) that
// forms the `mchp_dsp` pass described in mchp_dsp.cc - version for
// DSP48A/DSP48A1 (Spartan 3A DSP, Spartan 6).
// At a high level, it works as follows:
// ( 1) Starting from a DSP cell. Capture DSP configurations as states
// ( 2) Match for pre-adder
// ( 3) Match for post-adder
// ( 4) Match register 'A', 'B', 'D', 'P'
// ( 5) If post-adder and PREG both present, check if PREG feeds into post-adder.
// This indicates an accumulator situation like the ASCII diagram below:
// +--------------------------------+
// |_________ |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MULT|------ | adder | +----+
// +----+ \-------/
pattern mchp_dsp_pack
state <SigBit> clock
state <SigSpec> sigA sigB sigC sigD sigP
state <Cell*> ffA ffB ffD ffP
state <Cell*> preAdderStatic postAdderStatic
state <bool> moveBtoA useFeedBack
// static ports, used to detect dsp configuration
state <SigSpec> bypassA bypassB bypassC bypassD bypassP
state <SigSpec> bypassPASUB
// Variables used for subpatterns
state <SigSpec> argQ argD
udata <bool> allowAsync
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
udata <Cell*> u_preAdderStatic u_postAdderStatic
udata <IdString> u_postAddAB
state <IdString> postAddAB
// (1) Starting from a DSP cell
match dsp
select dsp->type.in(\MACC_PA)
endmatch
// detect existing signals connected to DSP
// detect configuration ports
code sigA sigB sigC sigD clock sigP
//helper function to remove unused bits
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
//unextend to remove unused bits
sigA = unextend(port(dsp, \A));
sigB = unextend(port(dsp, \B));
//update signals
sigC = port(dsp, \C, SigSpec());
sigD = port(dsp, \D, SigSpec());
SigSpec P = port(dsp, \P);
// Only care about bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
// This sigP could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
sigP = P.extract(0, i);
clock = port(dsp, \CLK, SigBit());
endcode
// capture static configuration ports
code bypassA bypassB bypassC bypassD bypassPASUB bypassP
bypassA = port(dsp, \A_BYPASS, SigSpec());
bypassB = port(dsp, \B_BYPASS, SigSpec());
bypassC = port(dsp, \C_BYPASS, SigSpec());
bypassD = port(dsp, \D_BYPASS, SigSpec());
bypassPASUB = port(dsp, \PASUB_BYPASS, SigSpec());
bypassP = port(dsp, \P_BYPASS, SigSpec());
endcode
// (2) Match for pre-adder
//
code sigA sigB sigD preAdderStatic moveBtoA
subpattern(preAddMatching);
preAdderStatic = u_preAdderStatic;
moveBtoA = false;
if (preAdderStatic) {
if (port(preAdderStatic, \Y) == sigA)
{
//used for packing
moveBtoA = true;
// sigA should be the input to the multiplier without the preAdd. sigB and sigD should be
//the preAdd inputs. If our "A" input into the multiplier is from the preAdd (not sigA), then
// we basically swap it.
sigA = port(dsp, \B);
}
// port B of preAdderStatic must be mapped to port D of DSP for subtraction
sigD = port(preAdderStatic, \B);
sigB = port(preAdderStatic, \A);
}
endcode
// (3) Match for post-adder
//
code postAdderStatic sigP sigC
u_postAdderStatic = nullptr;
subpattern(postAddMatching);
postAdderStatic = u_postAdderStatic;
if (postAdderStatic) {
//sigC will be whichever input to the postAdder that is NOT from the multiplier
// u_postAddAB is the input to the postAdder from the multiplier
sigC = port(postAdderStatic, u_postAddAB == \A ? \B : \A);
sigP = port(postAdderStatic, \Y);
}
endcode
// (4) Matching registers
//
// 'A' input for REG_A
code argQ bypassA sigA clock ffA
if (bypassA.is_fully_ones()){
argQ = sigA;
allowAsync = false;
subpattern(in_dffe);
if (dff) {
ffA = dff;
clock = dffclock;
sigA = dffD;
}
}
endcode
// 'B' input for REG_B
code argQ bypassB sigB clock ffB
if (bypassB.is_fully_ones()){
argQ = sigB;
allowAsync = false;
subpattern(in_dffe);
if (dff) {
ffB = dff;
clock = dffclock;
sigB = dffD;
}
}
endcode
// 'D' input for REG_D
code argQ bypassP sigD clock ffD
if (bypassD.is_fully_ones()){
argQ = sigD;
allowAsync = true;
subpattern(in_dffe);
if (dff) {
ffD = dff;
clock = dffclock;
sigD = dffD;
}
}
endcode
// 'P' output for REG_P
code argD ffP sigP clock bypassP
if (bypassP.is_fully_ones() && nusers(sigP) == 2) {
argD = sigP;
allowAsync = false;
subpattern(out_dffe);
if (dff) {
ffP = dff;
clock = dffclock;
sigP = dffQ;
}
}
endcode
// (5) If post-adder and PREG both present, check if PREG feeds into post-adder via port C.
// This indicates an accumulator situation. Port C can be freed
// +--------------------------------+
// |_________ |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MULT|------ | adder | +----+
// +----+ \-------/
code useFeedBack
useFeedBack = false;
if (postAdderStatic && ffP) {
if (sigC == sigP) {
useFeedBack = true;
}
}
endcode
// if any cells are absorbed, invoke the callback function
code
if (preAdderStatic || postAdderStatic)
accept;
if (ffA || ffB || ffD || ffP)
accept;
endcode
// #######################
// Subpattern for matching against post-adder
// Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
subpattern postAddMatching
arg sigP
match postAdd
select postAdd->type.in($add, $sub)
select GetSize(port(postAdd, \Y)) <= 48
// AB is the port that connects MUL to ADD
choice <IdString> AB {\A, \B}
select nusers(port(postAdd, AB)) == 2
// has one input coming from multiplier
index <SigBit> port(postAdd, AB)[0] === sigP[0]
filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
// Check that remainder of AB is a sign- or zero-extension
filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
set postAddAB AB
// optional
endmatch
code
if (postAdd)
{
if (postAdd->type.in(ID($sub)) && postAddAB == \A) {
// if $sub, the multiplier output must match to $sub.B, otherwise no match
} else {
u_postAddAB = postAddAB;
u_postAdderStatic = postAdd;
}
}
endcode
// #######################
// Subpattern for matching against pre-adder
// support static PASUB only
subpattern preAddMatching
arg sigA sigB sigD bypassB bypassD bypassPASUB
code
u_preAdderStatic = nullptr;
// Ensure that preAdder not already used
// Assume we can inspect port D to see if its all zeros.
if (!(sigD.empty() || sigD.is_fully_zero())) reject;
if (!bypassB.is_fully_ones()) reject;
if (!bypassD.is_fully_ones()) reject;
if (!bypassPASUB.is_fully_ones()) reject;
endcode
match preAdd
// can handle add or sub
select preAdd->type.in($add, $sub)
// Output has to be 18 bits or less, and only has single fanout
select GetSize(port(preAdd, \Y)) <= 18
select nusers(port(preAdd, \Y)) == 2
// Adder inputs must be 18 bits or less
select GetSize(port(preAdd, \A)) <= 18
select GetSize(port(preAdd, \B)) <= 18
// Output feeds into one of multiplier input
filter port(preAdd, \Y) == sigB || port(preAdd, \Y) == sigA
// optional
endmatch
code
if (preAdd)
{
u_preAdderStatic = preAdd;
}
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
// reg D has async rst
// reg A, B has sync rst
select ff->type.in($dff, $dffe, $sdff, $sdffe, $adff, $adffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// it is possible that only part of a dff output matches argQ
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
// only consider async rst flops when flag is set
filter !ff->type.in($adff, $adffe) || allowAsync
// clock must be consistent
filter clock == SigBit() || port(ff, \CLK) == clock
endmatch
code argQ
// Check that reset value, if present, is fully 0.
bool noResetFlop = ff->type.in($dff, $dffe);
bool srstZero = ff->type.in($sdff, $sdffe) && param(ff, \SRST_VALUE).is_fully_zero();
bool arstZero = ff->type.in($adff, $adffe) && param(ff, \ARST_VALUE).is_fully_zero();
bool resetLegal = noResetFlop || srstZero || arstZero;
if (resetLegal)
{
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
}
endcode
// #######################
subpattern out_dffe
arg argD argQ clock
code
dff = nullptr;
for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
filter clock == SigBit() || port(ff, \CLK) == clock
endmatch
code argQ
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
// Abandon matches when 'Q' has a non-zero init attribute set
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode

View file

@ -0,0 +1,169 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the second of three pattern matcher setups that
// forms the `mchp_dsp` pass described in mchp_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// Notes:
// - Running CREG packing after mchp_dsp_pack is necessary since there is no
// guarantee that the cell ordering corresponds to the "expected" case (i.e.
// the order in which they appear in the source) thus the possiblity existed
// where a register got packed as a CREG into a downstream DSP, while it should
// have otherwise been a PREG of an upstream DSP that had not been visited.
// yet.
// - The reason this is separated out from the mchp_dsp.pmg file is
// for efficiency --- each *.pmg file creates a class of the same basename,
// which when constructed, creates a custom database tailored to the
// pattern(s) contained within. Since the pattern in this file must be
// executed after the pattern contained in mchp_dsp.pmg, it is necessary
// to reconstruct this database. Separating the two patterns into
// independent files causes two smaller, more specific, databases.
pattern mchp_dsp_packC
udata <std::function<SigSpec(const SigSpec&)>> unextend
state <SigBit> clock
state <SigSpec> sigC sigP
state <Cell*> ffC
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
match dsp
select dsp->type.in(\MACC_PA)
select port(dsp, \C_BYPASS, SigSpec()).is_fully_ones()
select nusers(port(dsp, \C, SigSpec())) > 1
endmatch
code sigC sigP clock
//helper function to remove unused bits
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigC = unextend(port(dsp, \C, SigSpec()));
SigSpec P = port(dsp, \P);
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
sigP = P.extract(0, i);
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using the in_dffe subpattern)
code argQ ffC sigC clock
argQ = sigC;
subpattern(in_dffe);
if (dff) {
ffC = dff;
clock = dffclock;
sigC = dffD;
}
endcode
code
if (ffC)
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe, $adff, $adffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK) == clock
endmatch
code argQ
// Check that reset value, if present, is fully 0.
bool noResetFlop = ff->type.in($dff, $dffe);
bool srstZero = ff->type.in($sdff, $sdffe) && param(ff, \SRST_VALUE).is_fully_zero();
bool arstZero = ff->type.in($adff, $adffe) && param(ff, \ARST_VALUE).is_fully_zero();
bool resetLegal = noResetFlop || srstZero || arstZero;
if (resetLegal)
{
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
}
endcode

View file

@ -0,0 +1,238 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the third of three pattern matcher setups that
// forms the `mchp_dsp` pass described in mchp_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP cell that
// (a) CDIN_FDBK_SEL is set to default "00"
// (b) doesn't already use the 'PCOUT' port
// (2) Match another DSP cell that
// (a) does not have the CREG enabled,
// (b) 'C' port is driven by the 'P' output of the previous DSP cell
// (c) has its 'PCIN' port unused
// (3) Recursively go to (2) until no more matches possible, keeping track
// of the longest possible chain found
// (4) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
pattern mchp_dsp_cascade
udata <std::function<SigSpec(const SigSpec&)>> unextend
udata <vector<std::tuple<Cell*,int>>> chain longest_chain
udata <std::set<Cell*>> visited
state <Cell*> next
state <SigSpec> clock
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// Maximum of 24 cascaded blocks
code
#define MAX_DSP_CASCADE 24
endcode
// NOTE: Chain vector
// +--------+ +--------+
// | first |----> | next | ----> ...
// +--------+ +--------+
// first.COUT cascades to next.CIN, so on and so forth
// Helper function to remove unused bits
code
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
endcode
// (1) Starting from a DSP cell that
// (a) CDIN_FDBK_SEL is set to default "00"
// (b) doesn't already use the 'PCOUT' port
match first
select first->type.in(\MACC_PA) && port(first, \CDIN_FDBK_SEL, Const(0, 2)) == Const::from_string("00")
select nusers(port(first, \CDOUT, SigSpec())) <= 1
endmatch
// (4) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
code
visited.clear();
visited.insert(first);
longest_chain.clear();
chain.emplace_back(first, -1);
subpattern(tail);
finally
// longest cascade chain has been found with DSP "first" being the head of the chain
// do some post processing
chain.pop_back();
visited.clear();
log_assert(chain.empty());
if (GetSize(longest_chain) > 1) {
Cell *dsp = std::get<0>(longest_chain.front());
Cell *dsp_pcin;
int SHIFT = -1;
for (int i = 1; i < GetSize(longest_chain); i++) {
log_assert(dsp->type.in(\MACC_PA));
std::tie(dsp_pcin,SHIFT) = longest_chain[i];
// Chain length exceeds the maximum cascade length, must split it up
if (i % MAX_DSP_CASCADE > 0) {
Wire *cascade = module->addWire(NEW_ID, 48);
// zero port C and move wire to cascade
dsp_pcin->setPort(ID(C), Const(0, 48));
dsp_pcin->setPort(ID(CDIN), cascade);
dsp->setPort(ID(CDOUT), cascade);
// Configure wire to cascade the dsps
add_siguser(cascade, dsp_pcin);
add_siguser(cascade, dsp);
// configure mux to use cascade for signal E
SigSpec cdin_fdbk_sel = port(dsp_pcin, \CDIN_FDBK_SEL, Const(0, 2));
cdin_fdbk_sel[1] = State::S1;
dsp_pcin->setPort(\CDIN_FDBK_SEL, cdin_fdbk_sel);
// check if shifting is required for wide multiplier implmentation
if (SHIFT == 17)
{
dsp_pcin->setPort(\ARSHFT17, State::S1);
}
log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
} else {
log_debug(" Blocking %s -> %s cascade (exceeds max: %d)\n", log_id(dsp), log_id(dsp_pcin), MAX_DSP_CASCADE);
}
dsp = dsp_pcin;
}
accept;
}
endcode
// ------------------------------------------------------------------
subpattern tail
arg first
arg next
// (2) Match another DSP cell that
// (a) does not have the CREG enabled,
// (b) 'C' port is driven by the 'P' output of the previous DSP cell
// (c) has its 'PCIN' port unused
match nextP
// find candidates where nextP.C port is driven (maybe partially) by chain's tail DSP.P port
// and with no registers in between (since cascade path cannot be pipelined)
// reg C must not be used
select port(nextP, \C_BYPASS, SigSpec()).is_fully_ones()
// must be same DSP type
select nextP->type.in(\MACC_PA)
// port C should be driven by something
select nusers(port(nextP, \C, SigSpec())) > 1
// CIN must be unused
select nusers(port(nextP, \PCIN, SigSpec())) == 0
// should not have internal feedback connection
select port(nextP, \CDIN_FDBK_SEL, SigSpec()).is_fully_zero()
// SHIFT should be unused
select port(nextP, \ARSHFT17_BYPASS).is_fully_ones()
select port(nextP, \ARSHFT17).is_fully_zero()
select nusers(port(nextP, \ARSHFT17, SigSpec())) == 0
// current DSP cell can be cascaded with the back of the cascade chain
// index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0] || port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[17]
filter port(nextP, \C)[0] == port(std::get<0>(chain.back()), \P)[0] || port(nextP, \C)[0] == port(std::get<0>(chain.back()), \P)[17]
// semioptional
optional
endmatch
code next
next = nextP;
// keep DSP type consistent in the chain
// currently since we only have one type anyways, this line is always false
if (next && next->type != first->type) reject;
// break infinite recursion when there's a combinational loop
if (visited.count(next) > 0) reject;
endcode
// (3) Recursively go to (2) until no more matches possible, recording the
// longest possible chain
code
if (next) {
SigSpec driver_sigP = port(std::get<0>(chain.back()), \P);
int shift = 0;
if (port(next, \C)[0] == port(std::get<0>(chain.back()), \P)[17]) shift = 17;
chain.emplace_back(next, shift);
visited.insert(next);
SigSpec sigC = unextend(port(next, \C));
// Make sure driverDSP.P === DSP.C
if (GetSize(sigC) + shift <= GetSize(driver_sigP) && driver_sigP.extract(shift, GetSize(sigC)) == sigC)
{
subpattern(tail);
}
} else {
if (GetSize(chain) > GetSize(longest_chain))
longest_chain = chain;
}
finally
if (next)
{
visited.erase(next);
chain.pop_back();
}
endcode