3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-04-24 01:25:33 +00:00

pmgen: Move passes out of pmgen folder

- Techlib pmgens are now in relevant techlibs/*.
- `peepopt` pmgens are now in passes/opt.
- `test_pmgen` is still in passes/pmgen.
- Update `Makefile.inc` and `.gitignore` file(s) to match new `*_pm.h` location,
  as well as the `#include`s.
- Change default `%_pm.h` make target to `techlibs/%_pm.h` and move it to the
  top level Makefile.
- Update pmgen target to use `$(notdir $*)` (where `$*` is the part of the file
  name that matched the '%' in the target) instead of `$(subst _pm.h,,$(notdir
  $@))`.
This commit is contained in:
Krystine Sherwin 2025-01-31 15:06:09 +13:00
parent 18a7c00382
commit 0ec5f1b756
No known key found for this signature in database
31 changed files with 71 additions and 77 deletions

1
techlibs/.gitignore vendored
View file

@ -1 +1,2 @@
blackbox.v
*_pm.h

View file

@ -14,3 +14,13 @@ $(eval $(call add_share_file,share/ice40,techlibs/ice40/spram.txt))
$(eval $(call add_share_file,share/ice40,techlibs/ice40/spram_map.v))
$(eval $(call add_share_file,share/ice40,techlibs/ice40/dsp_map.v))
$(eval $(call add_share_file,share/ice40,techlibs/ice40/abc9_model.v))
OBJS += techlibs/ice40/ice40_dsp.o
GENFILES += techlibs/ice40/ice40_dsp_pm.h
techlibs/ice40/ice40_dsp.o: techlibs/ice40/ice40_dsp_pm.h
$(eval $(call add_extra_objs,techlibs/ice40/ice40_dsp_pm.h))
OBJS += techlibs/ice40/ice40_wrapcarry.o
GENFILES += techlibs/ice40/ice40_wrapcarry_pm.h
techlibs/ice40/ice40_wrapcarry.o: techlibs/ice40/ice40_wrapcarry_pm.h
$(eval $(call add_extra_objs,techlibs/ice40/ice40_wrapcarry_pm.h))

319
techlibs/ice40/ice40_dsp.cc Normal file
View file

@ -0,0 +1,319 @@
/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/ice40/ice40_dsp_pm.h"
void create_ice40_dsp(ice40_dsp_pm &pm)
{
auto &st = pm.st_ice40_dsp;
log("Checking %s.%s for iCE40 DSP inference.\n", log_id(pm.module), log_id(st.mul));
log_debug("ffA: %s\n", log_id(st.ffA, "--"));
log_debug("ffB: %s\n", log_id(st.ffB, "--"));
log_debug("ffCD: %s\n", log_id(st.ffCD, "--"));
log_debug("mul: %s\n", log_id(st.mul, "--"));
log_debug("ffFJKG: %s\n", log_id(st.ffFJKG, "--"));
log_debug("ffH: %s\n", log_id(st.ffH, "--"));
log_debug("add: %s\n", log_id(st.add, "--"));
log_debug("mux: %s\n", log_id(st.mux, "--"));
log_debug("ffO: %s\n", log_id(st.ffO, "--"));
log_debug("\n");
if (GetSize(st.sigA) > 16) {
log(" input A (%s) is too large (%d > 16).\n", log_signal(st.sigA), GetSize(st.sigA));
return;
}
if (GetSize(st.sigB) > 16) {
log(" input B (%s) is too large (%d > 16).\n", log_signal(st.sigB), GetSize(st.sigB));
return;
}
if (GetSize(st.sigO) > 33) {
log(" adder/accumulator (%s) is too large (%d > 33).\n", log_signal(st.sigO), GetSize(st.sigO));
return;
}
if (GetSize(st.sigH) > 32) {
log(" output (%s) is too large (%d > 32).\n", log_signal(st.sigH), GetSize(st.sigH));
return;
}
Cell *cell = st.mul;
if (cell->type == ID($mul)) {
log(" replacing %s with SB_MAC16 cell.\n", log_id(st.mul->type));
cell = pm.module->addCell(NEW_ID, ID(SB_MAC16));
pm.module->swap_names(cell, st.mul);
}
else log_assert(cell->type == ID(SB_MAC16));
// SB_MAC16 Input Interface
SigSpec A = st.sigA;
A.extend_u0(16, st.mul->getParam(ID::A_SIGNED).as_bool());
log_assert(GetSize(A) == 16);
SigSpec B = st.sigB;
B.extend_u0(16, st.mul->getParam(ID::B_SIGNED).as_bool());
log_assert(GetSize(B) == 16);
SigSpec CD = st.sigCD;
if (CD.empty())
CD = RTLIL::Const(0, 32);
else
log_assert(GetSize(CD) == 32);
cell->setPort(ID::A, A);
cell->setPort(ID::B, B);
cell->setPort(ID::C, CD.extract(16, 16));
cell->setPort(ID::D, CD.extract(0, 16));
cell->setParam(ID(A_REG), st.ffA ? State::S1 : State::S0);
cell->setParam(ID(B_REG), st.ffB ? State::S1 : State::S0);
cell->setParam(ID(C_REG), st.ffCD ? State::S1 : State::S0);
cell->setParam(ID(D_REG), st.ffCD ? State::S1 : State::S0);
SigSpec AHOLD, BHOLD, CDHOLD;
if (st.ffA && st.ffA->hasPort(ID::EN))
AHOLD = st.ffA->getParam(ID::EN_POLARITY).as_bool() ? pm.module->Not(NEW_ID, st.ffA->getPort(ID::EN)) : st.ffA->getPort(ID::EN);
else
AHOLD = State::S0;
if (st.ffB && st.ffB->hasPort(ID::EN))
BHOLD = st.ffB->getParam(ID::EN_POLARITY).as_bool() ? pm.module->Not(NEW_ID, st.ffB->getPort(ID::EN)) : st.ffB->getPort(ID::EN);
else
BHOLD = State::S0;
if (st.ffCD && st.ffCD->hasPort(ID::EN))
CDHOLD = st.ffCD->getParam(ID::EN_POLARITY).as_bool() ? pm.module->Not(NEW_ID, st.ffCD->getPort(ID::EN)) : st.ffCD->getPort(ID::EN);
else
CDHOLD = State::S0;
cell->setPort(ID(AHOLD), AHOLD);
cell->setPort(ID(BHOLD), BHOLD);
cell->setPort(ID(CHOLD), CDHOLD);
cell->setPort(ID(DHOLD), CDHOLD);
SigSpec IRSTTOP, IRSTBOT;
if (st.ffA && st.ffA->hasPort(ID::ARST))
IRSTTOP = st.ffA->getParam(ID::ARST_POLARITY).as_bool() ? st.ffA->getPort(ID::ARST) : pm.module->Not(NEW_ID, st.ffA->getPort(ID::ARST));
else
IRSTTOP = State::S0;
if (st.ffB && st.ffB->hasPort(ID::ARST))
IRSTBOT = st.ffB->getParam(ID::ARST_POLARITY).as_bool() ? st.ffB->getPort(ID::ARST) : pm.module->Not(NEW_ID, st.ffB->getPort(ID::ARST));
else
IRSTBOT = State::S0;
cell->setPort(ID(IRSTTOP), IRSTTOP);
cell->setPort(ID(IRSTBOT), IRSTBOT);
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
cell->setPort(ID(CE), State::S1);
cell->setParam(ID(NEG_TRIGGER), st.clock_pol ? State::S0 : State::S1);
log(" clock: %s (%s)", log_signal(st.clock), st.clock_pol ? "posedge" : "negedge");
if (st.ffA)
log(" ffA:%s", log_id(st.ffA));
if (st.ffB)
log(" ffB:%s", log_id(st.ffB));
if (st.ffCD)
log(" ffCD:%s", log_id(st.ffCD));
if (st.ffFJKG)
log(" ffFJKG:%s", log_id(st.ffFJKG));
if (st.ffH)
log(" ffH:%s", log_id(st.ffH));
if (st.ffO)
log(" ffO:%s", log_id(st.ffO));
log("\n");
}
else
{
cell->setPort(ID::CLK, State::S0);
cell->setPort(ID(CE), State::S0);
cell->setParam(ID(NEG_TRIGGER), State::S0);
}
// SB_MAC16 Cascade Interface
cell->setPort(ID(SIGNEXTIN), State::Sx);
cell->setPort(ID(SIGNEXTOUT), pm.module->addWire(NEW_ID));
cell->setPort(ID::CI, State::Sx);
cell->setPort(ID(ACCUMCI), State::Sx);
cell->setPort(ID(ACCUMCO), pm.module->addWire(NEW_ID));
// SB_MAC16 Output Interface
SigSpec O = st.sigO;
int O_width = GetSize(O);
if (O_width == 33) {
log_assert(st.add);
// If we have a signed multiply-add, then perform sign extension
if (st.add->getParam(ID::A_SIGNED).as_bool() && st.add->getParam(ID::B_SIGNED).as_bool())
pm.module->connect(O[32], O[31]);
else
cell->setPort(ID::CO, O[32]);
O.remove(O_width-1);
}
else
cell->setPort(ID::CO, pm.module->addWire(NEW_ID));
log_assert(GetSize(O) <= 32);
if (GetSize(O) < 32)
O.append(pm.module->addWire(NEW_ID, 32-GetSize(O)));
cell->setPort(ID::O, O);
bool accum = false;
if (st.add) {
accum = (st.ffO && st.add->getPort(st.addAB == ID::A ? ID::B : ID::A) == st.sigO);
if (accum)
log(" accumulator %s (%s)\n", log_id(st.add), log_id(st.add->type));
else
log(" adder %s (%s)\n", log_id(st.add), log_id(st.add->type));
cell->setPort(ID(ADDSUBTOP), st.add->type == ID($add) ? State::S0 : State::S1);
cell->setPort(ID(ADDSUBBOT), st.add->type == ID($add) ? State::S0 : State::S1);
} else {
cell->setPort(ID(ADDSUBTOP), State::S0);
cell->setPort(ID(ADDSUBBOT), State::S0);
}
SigSpec OHOLD;
if (st.ffO && st.ffO->hasPort(ID::EN))
OHOLD = st.ffO->getParam(ID::EN_POLARITY).as_bool() ? pm.module->Not(NEW_ID, st.ffO->getPort(ID::EN)) : st.ffO->getPort(ID::EN);
else
OHOLD = State::S0;
cell->setPort(ID(OHOLDTOP), OHOLD);
cell->setPort(ID(OHOLDBOT), OHOLD);
SigSpec ORST;
if (st.ffO && st.ffO->hasPort(ID::ARST))
ORST = st.ffO->getParam(ID::ARST_POLARITY).as_bool() ? st.ffO->getPort(ID::ARST) : pm.module->Not(NEW_ID, st.ffO->getPort(ID::ARST));
else
ORST = State::S0;
cell->setPort(ID(ORSTTOP), ORST);
cell->setPort(ID(ORSTBOT), ORST);
SigSpec acc_reset = State::S0;
if (st.mux) {
if (st.muxAB == ID::A)
acc_reset = st.mux->getPort(ID::S);
else
acc_reset = pm.module->Not(NEW_ID, st.mux->getPort(ID::S));
} else if (st.ffO && st.ffO->hasPort(ID::SRST)) {
acc_reset = st.ffO->getParam(ID::SRST_POLARITY).as_bool() ? st.ffO->getPort(ID::SRST) : pm.module->Not(NEW_ID, st.ffO->getPort(ID::SRST));
}
cell->setPort(ID(OLOADTOP), acc_reset);
cell->setPort(ID(OLOADBOT), acc_reset);
// SB_MAC16 Remaining Parameters
cell->setParam(ID(TOP_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0);
cell->setParam(ID(BOT_8x8_MULT_REG), st.ffFJKG ? State::S1 : State::S0);
cell->setParam(ID(PIPELINE_16x16_MULT_REG1), st.ffFJKG ? State::S1 : State::S0);
cell->setParam(ID(PIPELINE_16x16_MULT_REG2), st.ffH ? State::S1 : State::S0);
cell->setParam(ID(TOPADDSUB_LOWERINPUT), Const(2, 2));
cell->setParam(ID(TOPADDSUB_UPPERINPUT), accum ? State::S0 : State::S1);
cell->setParam(ID(TOPADDSUB_CARRYSELECT), Const(3, 2));
cell->setParam(ID(BOTADDSUB_LOWERINPUT), Const(2, 2));
cell->setParam(ID(BOTADDSUB_UPPERINPUT), accum ? State::S0 : State::S1);
cell->setParam(ID(BOTADDSUB_CARRYSELECT), Const(0, 2));
cell->setParam(ID(MODE_8x8), State::S0);
cell->setParam(ID::A_SIGNED, st.mul->getParam(ID::A_SIGNED).as_bool());
cell->setParam(ID::B_SIGNED, st.mul->getParam(ID::B_SIGNED).as_bool());
if (st.ffO) {
if (st.o_lo)
cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
else
cell->setParam(ID(TOPOUTPUT_SELECT), Const(1, 2));
st.ffO->connections_.at(ID::Q).replace(O, pm.module->addWire(NEW_ID, GetSize(O)));
cell->setParam(ID(BOTOUTPUT_SELECT), Const(1, 2));
}
else {
cell->setParam(ID(TOPOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
cell->setParam(ID(BOTOUTPUT_SELECT), Const(st.add ? 0 : 3, 2));
}
if (cell != st.mul)
pm.autoremove(st.mul);
else
pm.blacklist(st.mul);
pm.autoremove(st.ffFJKG);
pm.autoremove(st.add);
}
struct Ice40DspPass : public Pass {
Ice40DspPass() : Pass("ice40_dsp", "iCE40: map multipliers") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" ice40_dsp [options] [selection]\n");
log("\n");
log("Map multipliers ($mul/SB_MAC16) and multiply-accumulate ($mul/SB_MAC16 + $add)\n");
log("cells into iCE40 DSP resources.\n");
log("Currently, only the 16x16 multiply mode is supported and not the 2 x 8x8 mode.\n");
log("\n");
log("Pack input registers (A, B, {C,D}; with optional hold), pipeline registers\n");
log("({F,J,K,G}, H), output registers (O -- full 32-bits or lower 16-bits only; with\n");
log("optional hold), and post-adder into the SB_MAC16 resource.\n");
log("\n");
log("Multiply-accumulate operations using the post-adder with feedback on the {C,D}\n");
log("input will be folded into the DSP. In this scenario only, resetting the\n");
log("the accumulator to an arbitrary value can be inferred to use the {C,D} input.\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing ICE40_DSP pass (map multipliers).\n");
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
// if (args[argidx] == "-singleton") {
// singleton_mode = true;
// continue;
// }
break;
}
extra_args(args, argidx, design);
for (auto module : design->selected_modules())
ice40_dsp_pm(module, module->selected_cells()).run_ice40_dsp(create_ice40_dsp);
}
} Ice40DspPass;
PRIVATE_NAMESPACE_END

View file

@ -0,0 +1,417 @@
pattern ice40_dsp
state <SigBit> clock
state <bool> clock_pol cd_signed o_lo
state <SigSpec> sigA sigB sigCD sigH sigO
state <Cell*> add mux
state <IdString> addAB muxAB
state <Cell*> ffA ffB ffCD
state <Cell*> ffFJKG ffH ffO
// subpattern
state <bool> argSdff
state <SigSpec> argQ argD
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
udata <bool> dffclock_pol
match mul
select mul->type.in($mul, \SB_MAC16)
select GetSize(mul->getPort(\A)) + GetSize(mul->getPort(\B)) > 10
endmatch
code sigA sigB sigH
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove sign bit
++i;
return sig.extract(0, i);
};
sigA = unextend(port(mul, \A));
sigB = unextend(port(mul, \B));
SigSpec O;
if (mul->type == $mul)
O = mul->getPort(\Y);
else if (mul->type == \SB_MAC16)
O = mul->getPort(\O);
else log_abort();
if (GetSize(O) <= 10)
reject;
// Only care about those bits that are used
int i;
for (i = 0; i < GetSize(O); i++) {
if (nusers(O[i]) <= 1)
break;
sigH.append(O[i]);
}
// This sigM could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
log_assert(nusers(O.extract_end(i)) <= 1);
endcode
code argQ ffA sigA clock clock_pol
if (mul->type != \SB_MAC16 || !param(mul, \A_REG).as_bool()) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffA = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigA = dffD;
}
}
endcode
code argQ ffB sigB clock clock_pol
if (mul->type != \SB_MAC16 || !param(mul, \B_REG).as_bool()) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
ffB = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigB = dffD;
}
}
endcode
code argD argSdff ffFJKG sigH clock clock_pol
if (nusers(sigH) == 2 &&
(mul->type != \SB_MAC16 ||
(!param(mul, \TOP_8x8_MULT_REG).as_bool() && !param(mul, \BOT_8x8_MULT_REG).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool() && !param(mul, \PIPELINE_16x16_MULT_REG1).as_bool()))) {
argD = sigH;
argSdff = false;
subpattern(out_dffe);
if (dff) {
// F/J/K/G do not have a CE-like (hold) input
if (dff->hasPort(\EN))
goto reject_ffFJKG;
// Reset signal of F/J (IRSTTOP) and K/G (IRSTBOT)
// shared with A and B
if (ffA) {
if (ffA->hasPort(\ARST) != dff->hasPort(\ARST))
goto reject_ffFJKG;
if (ffA->hasPort(\ARST)) {
if (port(ffA, \ARST) != port(dff, \ARST))
goto reject_ffFJKG;
if (param(ffA, \ARST_POLARITY) != param(dff, \ARST_POLARITY))
goto reject_ffFJKG;
}
}
if (ffB) {
if (ffB->hasPort(\ARST) != dff->hasPort(\ARST))
goto reject_ffFJKG;
if (ffB->hasPort(\ARST)) {
if (port(ffB, \ARST) != port(dff, \ARST))
goto reject_ffFJKG;
if (param(ffB, \ARST_POLARITY) != param(dff, \ARST_POLARITY))
goto reject_ffFJKG;
}
}
ffFJKG = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigH = dffQ;
reject_ffFJKG: ;
}
}
endcode
code argD argSdff ffH sigH sigO clock clock_pol
if (ffFJKG && nusers(sigH) == 2 &&
(mul->type != \SB_MAC16 || !param(mul, \PIPELINE_16x16_MULT_REG2).as_bool())) {
argD = sigH;
argSdff = false;
subpattern(out_dffe);
if (dff) {
// H does not have a CE-like (hold) input
if (dff->hasPort(\EN))
goto reject_ffH;
// Reset signal of H (IRSTBOT) shared with B
if (ffB->hasPort(\ARST) != dff->hasPort(\ARST))
goto reject_ffH;
if (ffB->hasPort(\ARST)) {
if (port(ffB, \ARST) != port(dff, \ARST))
goto reject_ffH;
if (param(ffB, \ARST_POLARITY) != param(dff, \ARST_POLARITY))
goto reject_ffH;
}
ffH = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigH = dffQ;
reject_ffH: ;
}
}
sigO = sigH;
endcode
match add
if mul->type != \SB_MAC16 || (param(mul, \TOPOUTPUT_SELECT).as_int() == 3 && param(mul, \BOTOUTPUT_SELECT).as_int() == 3)
select add->type.in($add)
choice <IdString> AB {\A, \B}
select nusers(port(add, AB)) == 2
index <SigBit> port(add, AB)[0] === sigH[0]
filter GetSize(port(add, AB)) <= GetSize(sigH)
filter port(add, AB) == sigH.extract(0, GetSize(port(add, AB)))
filter nusers(sigH.extract_end(GetSize(port(add, AB)))) <= 1
set addAB AB
optional
endmatch
code sigCD sigO cd_signed
if (add) {
sigCD = port(add, addAB == \A ? \B : \A);
cd_signed = param(add, addAB == \A ? \B_SIGNED : \A_SIGNED).as_bool();
int natural_mul_width = GetSize(sigA) + GetSize(sigB);
int actual_mul_width = GetSize(sigH);
int actual_acc_width = GetSize(sigCD);
if ((actual_acc_width > actual_mul_width) && (natural_mul_width > actual_mul_width))
reject;
// If accumulator, check adder width and signedness
if (sigCD == sigH && (actual_acc_width != actual_mul_width) && (param(mul, \A_SIGNED).as_bool() != param(add, \A_SIGNED).as_bool()))
reject;
sigO = port(add, \Y);
}
endcode
match mux
select mux->type == $mux
choice <IdString> AB {\A, \B}
select nusers(port(mux, AB)) == 2
index <SigSpec> port(mux, AB) === sigO
set muxAB AB
optional
endmatch
code sigO
if (mux)
sigO = port(mux, \Y);
endcode
code argD argSdff ffO sigO sigCD clock clock_pol cd_signed o_lo
if (mul->type != \SB_MAC16 ||
// Ensure that register is not already used
((param(mul, \TOPOUTPUT_SELECT).as_int() != 1 && param(mul, \BOTOUTPUT_SELECT).as_int() != 1) &&
// Ensure that OLOADTOP/OLOADBOT is unused or zero
(port(mul, \OLOADTOP, State::S0).is_fully_zero() && port(mul, \OLOADBOT, State::S0).is_fully_zero()))) {
dff = nullptr;
// First try entire sigO
if (nusers(sigO) == 2) {
argD = sigO;
argSdff = !mux;
subpattern(out_dffe);
}
// Otherwise try just its least significant 16 bits
if (!dff && GetSize(sigO) > 16) {
argD = sigO.extract(0, 16);
if (nusers(argD) == 2) {
argSdff = !mux;
subpattern(out_dffe);
o_lo = dff;
}
}
if (dff) {
ffO = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigO.replace(sigO.extract(0, GetSize(dffQ)), dffQ);
}
// Loading value into output register is not
// supported unless using accumulator
if (mux) {
if (sigCD != sigO)
reject;
sigCD = port(mux, muxAB == \B ? \A : \B);
cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool();
} else if (dff && dff->hasPort(\SRST)) {
if (sigCD != sigO)
reject;
sigCD = param(dff, \SRST_VALUE);
cd_signed = add && param(add, \A_SIGNED).as_bool() && param(add, \B_SIGNED).as_bool();
}
}
endcode
code argQ ffCD sigCD clock clock_pol
if (!sigCD.empty() && sigCD != sigO &&
(mul->type != \SB_MAC16 || (!param(mul, \C_REG).as_bool() && !param(mul, \D_REG).as_bool()))) {
argQ = sigCD;
subpattern(in_dffe);
if (dff) {
// Reset signal of C (IRSTTOP) and D (IRSTBOT)
// shared with A and B
if (ffA) {
if (ffA->hasPort(\ARST) != dff->hasPort(\ARST))
goto reject_ffCD;
if (ffA->hasPort(\ARST)) {
if (port(ffA, \ARST) != port(dff, \ARST))
goto reject_ffCD;
if (param(ffA, \ARST_POLARITY) != param(dff, \ARST_POLARITY))
goto reject_ffCD;
}
}
if (ffB) {
if (ffB->hasPort(\ARST) != dff->hasPort(\ARST))
goto reject_ffCD;
if (ffB->hasPort(\ARST)) {
if (port(ffB, \ARST) != port(dff, \ARST))
goto reject_ffCD;
if (param(ffB, \ARST_POLARITY) != param(dff, \ARST_POLARITY))
goto reject_ffCD;
}
}
ffCD = dff;
clock = dffclock;
clock_pol = dffclock_pol;
sigCD = dffD;
reject_ffCD: ;
}
}
endcode
code sigCD
sigCD.extend_u0(32, cd_signed);
endcode
code
accept;
endcode
// #######################
subpattern in_dffe
arg argD argQ clock clock_pol
code
dff = nullptr;
if (argQ.empty())
reject;
for (auto c : argQ.chunks()) {
if (!c.wire)
reject;
if (c.wire->get_bool_attribute(\keep))
reject;
Const init = c.wire->attributes.at(\init, State::Sx);
if (!init.is_fully_undef() && !init.is_fully_zero())
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
endmatch
code argQ argD
{
if (clock != SigBit()) {
if (port(ff, \CLK)[0] != clock)
reject;
if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
reject;
}
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
dffD = argQ;
argD = port(ff, \D);
argQ = Q;
dffD.replace(argQ, argD);
}
endcode
// #######################
subpattern out_dffe
arg argD argSdff argQ clock clock_pol
code
dff = nullptr;
for (auto c : argD.chunks())
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffce)
// SB_MAC16 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Only allow sync reset if requested.
filter argSdff || ff->type.in($dff, $dffe)
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
endmatch
code argQ
if (ff) {
if (clock != SigBit()) {
if (port(ff, \CLK)[0] != clock)
reject;
if (param(ff, \CLK_POLARITY).as_bool() != clock_pol)
reject;
}
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, State::Sx);
if (!init.is_fully_undef() && !init.is_fully_zero())
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
dffclock_pol = param(ff, \CLK_POLARITY).as_bool();
}
endcode

View file

@ -0,0 +1,158 @@
/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/ice40/ice40_wrapcarry_pm.h"
void create_ice40_wrapcarry(ice40_wrapcarry_pm &pm)
{
auto &st = pm.st_ice40_wrapcarry;
#if 0
log("\n");
log("carry: %s\n", log_id(st.carry, "--"));
log("lut: %s\n", log_id(st.lut, "--"));
#endif
log(" replacing SB_LUT + SB_CARRY with $__ICE40_CARRY_WRAPPER cell.\n");
Cell *cell = pm.module->addCell(NEW_ID, ID($__ICE40_CARRY_WRAPPER));
pm.module->swap_names(cell, st.carry);
cell->setPort(ID::A, st.carry->getPort(ID(I0)));
cell->setPort(ID::B, st.carry->getPort(ID(I1)));
auto CI = st.carry->getPort(ID::CI);
cell->setPort(ID::CI, CI);
cell->setPort(ID::CO, st.carry->getPort(ID::CO));
cell->setPort(ID(I0), st.lut->getPort(ID(I0)));
auto I3 = st.lut->getPort(ID(I3));
if (pm.sigmap(CI) == pm.sigmap(I3)) {
cell->setParam(ID(I3_IS_CI), State::S1);
I3 = State::Sx;
}
else
cell->setParam(ID(I3_IS_CI), State::S0);
cell->setPort(ID(I3), I3);
cell->setPort(ID::O, st.lut->getPort(ID::O));
cell->setParam(ID::LUT, st.lut->getParam(ID(LUT_INIT)));
for (const auto &a : st.carry->attributes)
cell->attributes[stringf("\\SB_CARRY.%s", a.first.c_str())] = a.second;
for (const auto &a : st.lut->attributes)
cell->attributes[stringf("\\SB_LUT4.%s", a.first.c_str())] = a.second;
cell->attributes[ID(SB_LUT4.name)] = Const(st.lut->name.str());
if (st.carry->get_bool_attribute(ID::keep) || st.lut->get_bool_attribute(ID::keep))
cell->attributes[ID::keep] = true;
pm.autoremove(st.carry);
pm.autoremove(st.lut);
}
struct Ice40WrapCarryPass : public Pass {
Ice40WrapCarryPass() : Pass("ice40_wrapcarry", "iCE40: wrap carries") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" ice40_wrapcarry [selection]\n");
log("\n");
log("Wrap manually instantiated SB_CARRY cells, along with their associated SB_LUT4s,\n");
log("into an internal $__ICE40_CARRY_WRAPPER cell for preservation across technology\n");
log("mapping.\n");
log("\n");
log("Attributes on both cells will have their names prefixed with 'SB_CARRY.' or\n");
log("'SB_LUT4.' and attached to the wrapping cell.\n");
log("A (* keep *) attribute on either cell will be logically OR-ed together.\n");
log("\n");
log(" -unwrap\n");
log(" unwrap $__ICE40_CARRY_WRAPPER cells back into SB_CARRYs and SB_LUT4s,\n");
log(" including restoring their attributes.\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
bool unwrap = false;
log_header(design, "Executing ICE40_WRAPCARRY pass (wrap carries).\n");
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
if (args[argidx] == "-unwrap") {
unwrap = true;
continue;
}
break;
}
extra_args(args, argidx, design);
for (auto module : design->selected_modules()) {
if (!unwrap)
ice40_wrapcarry_pm(module, module->selected_cells()).run_ice40_wrapcarry(create_ice40_wrapcarry);
else {
for (auto cell : module->selected_cells()) {
if (cell->type != ID($__ICE40_CARRY_WRAPPER))
continue;
auto carry = module->addCell(NEW_ID, ID(SB_CARRY));
carry->setPort(ID(I0), cell->getPort(ID::A));
carry->setPort(ID(I1), cell->getPort(ID::B));
carry->setPort(ID::CI, cell->getPort(ID::CI));
carry->setPort(ID::CO, cell->getPort(ID::CO));
module->swap_names(carry, cell);
auto lut_name = cell->attributes.at(ID(SB_LUT4.name), Const(NEW_ID.str())).decode_string();
auto lut = module->addCell(lut_name, ID($lut));
lut->setParam(ID::WIDTH, 4);
lut->setParam(ID::LUT, cell->getParam(ID::LUT));
auto I3 = cell->getPort(cell->getParam(ID(I3_IS_CI)).as_bool() ? ID::CI : ID(I3));
lut->setPort(ID::A, { I3, cell->getPort(ID::B), cell->getPort(ID::A), cell->getPort(ID(I0)) });
lut->setPort(ID::Y, cell->getPort(ID::O));
Const src;
for (const auto &a : cell->attributes)
if (a.first.begins_with("\\SB_CARRY.\\"))
carry->attributes[a.first.c_str() + strlen("\\SB_CARRY.")] = a.second;
else if (a.first.begins_with("\\SB_LUT4.\\"))
lut->attributes[a.first.c_str() + strlen("\\SB_LUT4.")] = a.second;
else if (a.first == ID::src)
src = a.second;
else if (a.first.in(ID(SB_LUT4.name), ID::keep, ID::module_not_derived))
continue;
else
log_abort();
if (!src.empty()) {
carry->attributes.insert(std::make_pair(ID::src, src));
lut->attributes.insert(std::make_pair(ID::src, src));
}
module->remove(cell);
}
}
}
}
} Ice40WrapCarryPass;
PRIVATE_NAMESPACE_END

View file

@ -0,0 +1,15 @@
pattern ice40_wrapcarry
match carry
select carry->type.in(\SB_CARRY)
endmatch
match lut
select lut->type.in(\SB_LUT4)
index <SigSpec> port(lut, \I1) === port(carry, \I0)
index <SigSpec> port(lut, \I2) === port(carry, \I1)
endmatch
code
accept;
endcode

View file

@ -29,3 +29,12 @@ $(eval $(call add_share_file,share/microchip,techlibs/microchip/LSRAM_map.v))
$(eval $(call add_share_file,share/microchip,techlibs/microchip/LSRAM.txt))
$(eval $(call add_share_file,share/microchip,techlibs/microchip/uSRAM_map.v))
$(eval $(call add_share_file,share/microchip,techlibs/microchip/uSRAM.txt))
OBJS += techlibs/microchip/microchip_dsp.o
GENFILES += techlibs/microchip/microchip_dsp_pm.h
GENFILES += techlibs/microchip/microchip_dsp_CREG_pm.h
GENFILES += techlibs/microchip/microchip_dsp_cascade_pm.h
techlibs/microchip/microchip_dsp.o: techlibs/microchip/microchip_dsp_pm.h techlibs/microchip/microchip_dsp_CREG_pm.h techlibs/microchip/microchip_dsp_cascade_pm.h
$(eval $(call add_extra_objs,techlibs/microchip/microchip_dsp_pm.h))
$(eval $(call add_extra_objs,techlibs/microchip/microchip_dsp_CREG_pm.h))
$(eval $(call add_extra_objs,techlibs/microchip/microchip_dsp_cascade_pm.h))

View file

@ -0,0 +1,362 @@
/*
ISC License
Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "kernel/sigtools.h"
#include "kernel/yosys.h"
#include <deque>
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/microchip/microchip_dsp_CREG_pm.h"
#include "techlibs/microchip/microchip_dsp_cascade_pm.h"
#include "techlibs/microchip/microchip_dsp_pm.h"
void microchip_dsp_pack(microchip_dsp_pm &pm)
{
auto &st = pm.st_microchip_dsp_pack;
log("Analysing %s.%s for Microchip MACC_PA packing.\n", log_id(pm.module), log_id(st.dsp));
Cell *cell = st.dsp;
// pack pre-adder
if (st.preAdderStatic) {
SigSpec &pasub = cell->connections_.at(ID(PASUB));
log(" static PASUB preadder %s (%s)\n", log_id(st.preAdderStatic), log_id(st.preAdderStatic->type));
bool D_SIGNED = st.preAdderStatic->getParam(ID::B_SIGNED).as_bool();
bool B_SIGNED = st.preAdderStatic->getParam(ID::A_SIGNED).as_bool();
st.sigB.extend_u0(18, B_SIGNED);
st.sigD.extend_u0(18, D_SIGNED);
if (st.moveBtoA) {
cell->setPort(ID::A, st.sigA); // if pre-adder feeds into A, original sigB will be moved to port A
}
cell->setPort(ID::B, st.sigB);
cell->setPort(ID::D, st.sigD);
// MACC_PA supports both addition and subtraction with the pre-adder.
// Affects the sign of the 'D' port.
if (st.preAdderStatic->type == ID($add))
pasub[0] = State::S0;
else if (st.preAdderStatic->type == ID($sub))
pasub[0] = State::S1;
else
log_assert(!"strange pre-adder type");
pm.autoremove(st.preAdderStatic);
}
// pack post-adder
if (st.postAdderStatic) {
log(" postadder %s (%s)\n", log_id(st.postAdderStatic), log_id(st.postAdderStatic->type));
SigSpec &sub = cell->connections_.at(ID(SUB));
// Post-adder in MACC_PA also supports subtraction
// Determines the sign of the output from the multiplier.
if (st.postAdderStatic->type == ID($add))
sub[0] = State::S0;
else if (st.postAdderStatic->type == ID($sub))
sub[0] = State::S1;
else
log_assert(!"strange post-adder type");
if (st.useFeedBack) {
cell->setPort(ID(CDIN_FDBK_SEL), {State::S0, State::S1});
} else {
st.sigC.extend_u0(48, st.postAdderStatic->getParam(ID::A_SIGNED).as_bool());
cell->setPort(ID::C, st.sigC);
}
pm.autoremove(st.postAdderStatic);
}
// pack registers
if (st.clock != SigBit()) {
cell->setPort(ID::CLK, st.clock);
// function to absorb a register
auto f = [&pm, cell](SigSpec &A, Cell *ff, IdString ceport, IdString rstport, IdString bypass) {
// input/output ports
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol_n = !ff->getParam(ID::SRST_POLARITY).as_bool();
// active low sync rst
cell->setPort(rstport, rstpol_n ? srst : pm.module->Not(NEW_ID, srst));
} else if (ff->type.in(ID($adff), ID($adffe))) {
SigSpec arst = ff->getPort(ID::ARST);
bool rstpol_n = !ff->getParam(ID::ARST_POLARITY).as_bool();
// active low async rst
cell->setPort(rstport, rstpol_n ? arst : pm.module->Not(NEW_ID, arst));
} else {
// active low async/sync rst
cell->setPort(rstport, State::S1);
}
}
if (ff->type.in(ID($dffe), ID($sdffe), ID($adffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
// enables are all active high
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
} else {
// enables are all active high
cell->setPort(ceport, State::S1);
}
// bypass set to 0
cell->setPort(bypass, State::S0);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset + c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second.bits()[i] = State::Sx;
}
}
};
// NOTE: flops are not autoremoved because it is possible that they
// are only partially absorbed into DSP, or have fanouts.
if (st.ffA) {
SigSpec A = cell->getPort(ID::A);
if (st.ffA) {
f(A, st.ffA, ID(A_EN), ID(A_SRST_N), ID(A_BYPASS));
}
pm.add_siguser(A, cell);
cell->setPort(ID::A, A);
}
if (st.ffB) {
SigSpec B = cell->getPort(ID::B);
if (st.ffB) {
f(B, st.ffB, ID(B_EN), ID(B_SRST_N), ID(B_BYPASS));
}
pm.add_siguser(B, cell);
cell->setPort(ID::B, B);
}
if (st.ffD) {
SigSpec D = cell->getPort(ID::D);
if (st.ffD->type.in(ID($adff), ID($adffe))) {
f(D, st.ffD, ID(D_EN), ID(D_ARST_N), ID(D_BYPASS));
} else {
f(D, st.ffD, ID(D_EN), ID(D_SRST_N), ID(D_BYPASS));
}
pm.add_siguser(D, cell);
cell->setPort(ID::D, D);
}
if (st.ffP) {
SigSpec P; // unused
f(P, st.ffP, ID(P_EN), ID(P_SRST_N), ID(P_BYPASS));
st.ffP->connections_.at(ID::Q).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
}
log(" clock: %s (%s)\n", log_signal(st.clock), "posedge");
if (st.ffA)
log(" \t ffA:%s\n", log_id(st.ffA));
if (st.ffB)
log(" \t ffB:%s\n", log_id(st.ffB));
if (st.ffD)
log(" \t ffD:%s\n", log_id(st.ffD));
if (st.ffP)
log(" \t ffP:%s\n", log_id(st.ffP));
}
log("\n");
SigSpec P = st.sigP;
if (GetSize(P) < 48)
P.append(pm.module->addWire(NEW_ID, 48 - GetSize(P)));
cell->setPort(ID::P, P);
pm.blacklist(cell);
}
// For packing cascaded DSPs
void microchip_dsp_packC(microchip_dsp_CREG_pm &pm)
{
auto &st = pm.st_microchip_dsp_packC;
log_debug("Analysing %s.%s for Microchip DSP packing (REG_C).\n", log_id(pm.module), log_id(st.dsp));
log_debug("ffC: %s\n", log_id(st.ffC, "--"));
Cell *cell = st.dsp;
if (st.clock != SigBit()) {
cell->setPort(ID::CLK, st.clock);
// same function as above, used for the last CREG we need to absorb
auto f = [&pm, cell](SigSpec &A, Cell *ff, IdString ceport, IdString rstport, IdString bypass) {
// input/output ports
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol_n = !ff->getParam(ID::SRST_POLARITY).as_bool();
// active low sync rst
cell->setPort(rstport, rstpol_n ? srst : pm.module->Not(NEW_ID, srst));
} else if (ff->type.in(ID($adff), ID($adffe))) {
SigSpec arst = ff->getPort(ID::ARST);
bool rstpol_n = !ff->getParam(ID::ARST_POLARITY).as_bool();
// active low async rst
cell->setPort(rstport, rstpol_n ? arst : pm.module->Not(NEW_ID, arst));
} else {
// active low async/sync rst
cell->setPort(rstport, State::S1);
}
}
if (ff->type.in(ID($dffe), ID($sdffe), ID($adffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
// enables are all active high
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
} else {
// enables are all active high
cell->setPort(ceport, State::S1);
}
// bypass set to 0
cell->setPort(bypass, State::S0);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset + c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second.bits()[i] = State::Sx;
}
}
};
if (st.ffC) {
SigSpec C = cell->getPort(ID::C);
if (st.ffC->type.in(ID($adff), ID($adffe))) {
f(C, st.ffC, ID(C_EN), ID(C_ARST_N), ID(C_BYPASS));
} else {
f(C, st.ffC, ID(C_EN), ID(C_SRST_N), ID(C_BYPASS));
}
pm.add_siguser(C, cell);
cell->setPort(ID::C, C);
}
log(" clock: %s (%s)", log_signal(st.clock), "posedge");
if (st.ffC)
log(" ffC:%s", log_id(st.ffC));
log("\n");
}
pm.blacklist(cell);
}
struct MicrochipDspPass : public Pass {
MicrochipDspPass() : Pass("microchip_dsp", "MICROCHIP: pack resources into DSPs") {}
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" microchip_dsp [options] [selection]\n");
log("\n");
log("Pack input registers 'A', 'B', 'C', and 'D' (with optional enable/reset),\n");
log("output register 'P' (with optional enable/reset), pre-adder and/or post-adder into\n");
log("Microchip DSP resources.\n");
log("\n");
log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n");
log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n");
log("used to override the current accumulation result with a new value. This will\n");
log("be added to the multiplier result to form the next accumulation result.\n");
log("\n");
log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n");
log("connections (optionally, where 'P' is right-shifted by 17-bits and used as an\n");
log("input to the post-adder. This pattern is common for summing partial products to\n");
log("implement wide multipliers). Cascade chains are limited to a mazimum length \n");
log("of 24 cells, corresponding to PolarFire (pf) devices.\n");
log("\n");
log("This pass is a no-op if the scratchpad variable 'microchip_dsp.multonly' is set\n");
log("to 1.\n");
log("\n");
log("\n");
log(" -family {polarfire}\n");
log(" select the family to target\n");
log(" default: polarfire\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing MICROCHIP_DSP pass (pack resources into DSPs).\n");
std::string family = "polarfire";
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++) {
if ((args[argidx] == "-family") && argidx + 1 < args.size()) {
family = args[++argidx];
continue;
}
break;
}
extra_args(args, argidx, design);
for (auto module : design->selected_modules()) {
if (design->scratchpad_get_bool("microchip_dsp.multonly"))
continue;
{
// For more details on PolarFire MACC_PA, consult
// the "PolarFire FPGA Macro Library Guide"
// Main pattern matching step to capture a DSP cell.
// Match for pre-adder, post-adder, as well as
// registers 'A', 'B', 'D', and 'P'. Additionally,
// check for an accumulator pattern based on whether
// a post-adder and PREG are both present AND
// if PREG feeds into this post-adder.
microchip_dsp_pm pm(module, module->selected_cells());
pm.run_microchip_dsp_pack(microchip_dsp_pack);
}
// Separating out CREG packing is necessary since there
// is no guarantee that the cell ordering corresponds
// to the "expected" case (i.e. the order in which
// they appear in the source). There existed the possibility
// where a register got packed as a CREG into a
// downstream DSP that should have otherwise been a
// PREG of an upstream DSP that had not been visited
// yet
{
microchip_dsp_CREG_pm pm(module, module->selected_cells());
pm.run_microchip_dsp_packC(microchip_dsp_packC);
}
// Lastly, identify and utilise PCOUT -> PCIN chains
{
microchip_dsp_cascade_pm pm(module, module->selected_cells());
pm.run_microchip_dsp_cascade();
}
}
}
} MicrochipDspPass;
PRIVATE_NAMESPACE_END

View file

@ -0,0 +1,439 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the main pattern matcher setup (of three total) that
// forms the `microchip_dsp` pass described in microchip_dsp.cc
// At a high level, it works as follows:
// ( 1) Starting from a DSP cell. Capture DSP configurations as states
// ( 2) Match for pre-adder
// ( 3) Match for post-adder
// ( 4) Match register 'A', 'B', 'D', 'P'
// ( 5) If post-adder and PREG both present, check if PREG feeds into post-adder.
// This indicates an accumulator situation like the ASCII diagram below:
// +--------------------------------+
// |_________ |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MULT|------ | adder | +----+
// +----+ \-------/
pattern microchip_dsp_pack
state <SigBit> clock
state <SigSpec> sigA sigB sigC sigD sigP
state <Cell*> ffA ffB ffD ffP
state <Cell*> preAdderStatic postAdderStatic
state <bool> moveBtoA useFeedBack
// static ports, used to detect dsp configuration
state <SigSpec> bypassA bypassB bypassC bypassD bypassP
state <SigSpec> bypassPASUB
// Variables used for subpatterns
state <SigSpec> argQ argD
udata <bool> allowAsync
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
udata <Cell*> u_preAdderStatic u_postAdderStatic
udata <IdString> u_postAddAB
state <IdString> postAddAB
// (1) Starting from a DSP cell
match dsp
select dsp->type.in(\MACC_PA)
endmatch
// detect existing signals connected to DSP
// detect configuration ports
code sigA sigB sigC sigD clock sigP
//helper function to remove unused bits
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
//unextend to remove unused bits
sigA = unextend(port(dsp, \A));
sigB = unextend(port(dsp, \B));
//update signals
sigC = port(dsp, \C, SigSpec());
sigD = port(dsp, \D, SigSpec());
SigSpec P = port(dsp, \P);
// Only care about bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
// This sigP could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
sigP = P.extract(0, i);
clock = port(dsp, \CLK, SigBit());
endcode
// capture static configuration ports
code bypassA bypassB bypassC bypassD bypassPASUB bypassP
bypassA = port(dsp, \A_BYPASS, SigSpec());
bypassB = port(dsp, \B_BYPASS, SigSpec());
bypassC = port(dsp, \C_BYPASS, SigSpec());
bypassD = port(dsp, \D_BYPASS, SigSpec());
bypassPASUB = port(dsp, \PASUB_BYPASS, SigSpec());
bypassP = port(dsp, \P_BYPASS, SigSpec());
endcode
// (2) Match for pre-adder
//
code sigA sigB sigD preAdderStatic moveBtoA
subpattern(preAddMatching);
preAdderStatic = u_preAdderStatic;
moveBtoA = false;
if (preAdderStatic) {
if (port(preAdderStatic, \Y) == sigA)
{
//used for packing
moveBtoA = true;
// sigA should be the input to the multiplier without the preAdd. sigB and sigD should be
//the preAdd inputs. If our "A" input into the multiplier is from the preAdd (not sigA), then
// we basically swap it.
sigA = sigB;
}
// port B of preAdderStatic must be mapped to port D of DSP for subtraction
sigD = port(preAdderStatic, \B);
sigB = port(preAdderStatic, \A);
}
endcode
// (3) Match for post-adder
//
code postAdderStatic sigP sigC
u_postAdderStatic = nullptr;
subpattern(postAddMatching);
postAdderStatic = u_postAdderStatic;
if (postAdderStatic) {
//sigC will be whichever input to the postAdder that is NOT from the multiplier
// u_postAddAB is the input to the postAdder from the multiplier
sigC = port(postAdderStatic, u_postAddAB == \A ? \B : \A);
sigP = port(postAdderStatic, \Y);
}
endcode
// (4) Matching registers
//
// 'A' input for REG_A
code argQ bypassA sigA clock ffA
if (bypassA.is_fully_ones()){
argQ = sigA;
allowAsync = false;
subpattern(in_dffe);
if (dff) {
ffA = dff;
clock = dffclock;
sigA = dffD;
}
}
endcode
// 'B' input for REG_B
code argQ bypassB sigB clock ffB
if (bypassB.is_fully_ones()){
argQ = sigB;
allowAsync = false;
subpattern(in_dffe);
if (dff) {
ffB = dff;
clock = dffclock;
sigB = dffD;
}
}
endcode
// 'D' input for REG_D
code argQ bypassP sigD clock ffD
if (bypassD.is_fully_ones()){
argQ = sigD;
allowAsync = true;
subpattern(in_dffe);
if (dff) {
ffD = dff;
clock = dffclock;
sigD = dffD;
}
}
endcode
// 'P' output for REG_P
code argD ffP sigP clock bypassP
if (bypassP.is_fully_ones() && nusers(sigP) == 2) {
argD = sigP;
allowAsync = false;
subpattern(out_dffe);
if (dff) {
ffP = dff;
clock = dffclock;
sigP = dffQ;
}
}
endcode
// (5) If post-adder and PREG both present, check if PREG feeds into post-adder via port C.
// This indicates an accumulator situation. Port C can be freed
// +--------------------------------+
// |_________ |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MULT|------ | adder | +----+
// +----+ \-------/
code useFeedBack
useFeedBack = false;
if (postAdderStatic && ffP) {
if (sigC == sigP) {
useFeedBack = true;
}
}
endcode
// if any cells are absorbed, invoke the callback function
code
if (preAdderStatic || postAdderStatic)
accept;
if (ffA || ffB || ffD || ffP)
accept;
endcode
// #######################
// Subpattern for matching against post-adder
// Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
subpattern postAddMatching
arg sigP
match postAdd
select postAdd->type.in($add, $sub)
select GetSize(port(postAdd, \Y)) <= 48
// AB is the port that connects MUL to ADD
choice <IdString> AB {\A, \B}
select nusers(port(postAdd, AB)) == 2
// has one input coming from multiplier
index <SigBit> port(postAdd, AB)[0] === sigP[0]
filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
// Check that remainder of AB is a sign- or zero-extension
filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
set postAddAB AB
// optional
endmatch
code
if (postAdd)
{
if (postAdd->type.in(ID($sub)) && postAddAB == \A) {
// if $sub, the multiplier output must match to $sub.B, otherwise no match
} else {
u_postAddAB = postAddAB;
u_postAdderStatic = postAdd;
}
}
endcode
// #######################
// Subpattern for matching against pre-adder
// support static PASUB only
subpattern preAddMatching
arg sigA sigB sigD bypassB bypassD bypassPASUB
code
u_preAdderStatic = nullptr;
// Ensure that preAdder not already used
// Assume we can inspect port D to see if its all zeros.
if (!(sigD.empty() || sigD.is_fully_zero())) reject;
if (!bypassB.is_fully_ones()) reject;
if (!bypassD.is_fully_ones()) reject;
if (!bypassPASUB.is_fully_ones()) reject;
endcode
match preAdd
// can handle add or sub
select preAdd->type.in($add, $sub)
// Output has to be 18 bits or less, and only has single fanout
select GetSize(port(preAdd, \Y)) <= 18
select nusers(port(preAdd, \Y)) == 2
// Adder inputs must be 18 bits or less
select GetSize(port(preAdd, \A)) <= 18
select GetSize(port(preAdd, \B)) <= 18
// Output feeds into one of multiplier input
filter port(preAdd, \Y) == sigB || port(preAdd, \Y) == sigA
// optional
endmatch
code
if (preAdd)
{
u_preAdderStatic = preAdd;
}
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
// reg D has async rst
// reg A, B has sync rst
select ff->type.in($dff, $dffe, $sdff, $sdffe, $adff, $adffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// it is possible that only part of a dff output matches argQ
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
// only consider async rst flops when flag is set
filter !ff->type.in($adff, $adffe) || allowAsync
// clock must be consistent
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
// Check that reset value, if present, is fully 0.
bool noResetFlop = ff->type.in($dff, $dffe);
bool srstZero = ff->type.in($sdff, $sdffe) && param(ff, \SRST_VALUE).is_fully_zero();
bool arstZero = ff->type.in($adff, $adffe) && param(ff, \ARST_VALUE).is_fully_zero();
bool resetLegal = noResetFlop || srstZero || arstZero;
if (resetLegal)
{
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
}
endcode
// #######################
subpattern out_dffe
arg argD argQ clock
code
dff = nullptr;
for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
// Abandon matches when 'Q' has a non-zero init attribute set
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode

View file

@ -0,0 +1,168 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the second of three pattern matcher setups that
// forms the `microchip_dsp` pass described in microchip_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// Notes:
// - Running CREG packing after microchip_dsp_pack is necessary since there is no
// guarantee that the cell ordering corresponds to the "expected" case (i.e.
// the order in which they appear in the source) thus the possiblity existed
// where a register got packed as a CREG into a downstream DSP, while it should
// have otherwise been a PREG of an upstream DSP that had not been visited.
// yet.
// - The reason this is separated out from the microchip_dsp.pmg file is
// for efficiency --- each *.pmg file creates a class of the same basename,
// which when constructed, creates a custom database tailored to the
// pattern(s) contained within. Since the pattern in this file must be
// executed after the pattern contained in microchip_dsp.pmg, it is necessary
// to reconstruct this database. Separating the two patterns into
// independent files causes two smaller, more specific, databases.
pattern microchip_dsp_packC
udata <std::function<SigSpec(const SigSpec&)>> unextend
state <SigBit> clock
state <SigSpec> sigC sigP
state <Cell*> ffC
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
match dsp
select dsp->type.in(\MACC_PA)
select port(dsp, \C_BYPASS, SigSpec()).is_fully_ones()
select nusers(port(dsp, \C, SigSpec())) > 1
endmatch
code sigC sigP clock
//helper function to remove unused bits
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigC = unextend(port(dsp, \C, SigSpec()));
SigSpec P = port(dsp, \P);
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
sigP = P.extract(0, i);
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using the in_dffe subpattern)
code argQ ffC sigC clock
argQ = sigC;
subpattern(in_dffe);
if (dff) {
ffC = dff;
clock = dffclock;
sigC = dffD;
}
endcode
code
if (ffC)
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe, $adff, $adffe)
// does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
// Check that reset value, if present, is fully 0.
bool noResetFlop = ff->type.in($dff, $dffe);
bool srstZero = ff->type.in($sdff, $sdffe) && param(ff, \SRST_VALUE).is_fully_zero();
bool arstZero = ff->type.in($adff, $adffe) && param(ff, \ARST_VALUE).is_fully_zero();
bool resetLegal = noResetFlop || srstZero || arstZero;
if (resetLegal)
{
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
}
endcode

View file

@ -0,0 +1,236 @@
// ISC License
//
// Copyright (C) 2024 Microchip Technology Inc. and its subsidiaries
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice appear in all copies.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// This file describes the third of three pattern matcher setups that
// forms the `microchip_dsp` pass described in microchip_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP cell that
// (a) CDIN_FDBK_SEL is set to default "00"
// (b) doesn't already use the 'PCOUT' port
// (2) Match another DSP cell that
// (a) does not have the CREG enabled,
// (b) 'C' port is driven by the 'P' output of the previous DSP cell
// (c) has its 'PCIN' port unused
// (3) Recursively go to (2) until no more matches possible, keeping track
// of the longest possible chain found
// (4) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
pattern microchip_dsp_cascade
udata <std::function<SigSpec(const SigSpec&)>> unextend
udata <vector<std::tuple<Cell*,int>>> chain longest_chain
udata <std::set<Cell*>> visited
state <Cell*> next
state <SigSpec> clock
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// Maximum of 24 cascaded blocks
code
#define MAX_DSP_CASCADE 24
endcode
// NOTE: Chain vector
// +--------+ +--------+
// | first |----> | next | ----> ...
// +--------+ +--------+
// first.COUT cascades to next.CIN, so on and so forth
// Helper function to remove unused bits
code
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
endcode
// (1) Starting from a DSP cell that
// (a) CDIN_FDBK_SEL is set to default "00"
// (b) doesn't already use the 'PCOUT' port
match first
select first->type.in(\MACC_PA) && port(first, \CDIN_FDBK_SEL, Const(0, 2)) == Const::from_string("00")
select nusers(port(first, \CDOUT, SigSpec())) <= 1
endmatch
// (4) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
code
visited.clear();
visited.insert(first);
longest_chain.clear();
chain.emplace_back(first, -1);
subpattern(tail);
finally
// longest cascade chain has been found with DSP "first" being the head of the chain
// do some post processing
chain.pop_back();
visited.clear();
log_assert(chain.empty());
if (GetSize(longest_chain) > 1) {
Cell *dsp = std::get<0>(longest_chain.front());
Cell *dsp_pcin;
int SHIFT = -1;
for (int i = 1; i < GetSize(longest_chain); i++) {
log_assert(dsp->type.in(\MACC_PA));
std::tie(dsp_pcin,SHIFT) = longest_chain[i];
// Chain length exceeds the maximum cascade length, must split it up
if (i % MAX_DSP_CASCADE > 0) {
Wire *cascade = module->addWire(NEW_ID, 48);
// zero port C and move wire to cascade
dsp_pcin->setPort(ID(C), Const(0, 48));
dsp_pcin->setPort(ID(CDIN), cascade);
dsp->setPort(ID(CDOUT), cascade);
// Configure wire to cascade the dsps
add_siguser(cascade, dsp_pcin);
add_siguser(cascade, dsp);
// configure mux to use cascade for signal E
SigSpec cdin_fdbk_sel = port(dsp_pcin, \CDIN_FDBK_SEL, Const(0, 2));
cdin_fdbk_sel[1] = State::S1;
dsp_pcin->setPort(\CDIN_FDBK_SEL, cdin_fdbk_sel);
// check if shifting is required for wide multiplier implmentation
if (SHIFT == 17)
{
dsp_pcin->setPort(\ARSHFT17, State::S1);
}
log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
} else {
log_debug(" Blocking %s -> %s cascade (exceeds max: %d)\n", log_id(dsp), log_id(dsp_pcin), MAX_DSP_CASCADE);
}
dsp = dsp_pcin;
}
accept;
}
endcode
// ------------------------------------------------------------------
subpattern tail
arg first
arg next
// (2) Match another DSP cell that
// (a) does not have the CREG enabled,
// (b) 'C' port is driven by the 'P' output of the previous DSP cell
// (c) has its 'PCIN' port unused
match nextP
// find candidates where nextP.C port is driven (maybe partially) by chain's tail DSP.P port
// and with no registers in between (since cascade path cannot be pipelined)
// reg C must not be used
select port(nextP, \C_BYPASS, SigSpec()).is_fully_ones()
// must be same DSP type
select nextP->type.in(\MACC_PA)
// port C should be driven by something
select nusers(port(nextP, \C, SigSpec())) > 1
// CIN must be unused
select nusers(port(nextP, \PCIN, SigSpec())) == 0
// should not have internal feedback connection
select port(nextP, \CDIN_FDBK_SEL, SigSpec()).is_fully_zero()
// SHIFT should be unused
select port(nextP, \ARSHFT17_BYPASS).is_fully_ones()
select port(nextP, \ARSHFT17).is_fully_zero()
select nusers(port(nextP, \ARSHFT17, SigSpec())) == 0
// current DSP cell can be cascaded with the back of the cascade chain
// index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0] || port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[17]
filter port(nextP, \C)[0] == port(std::get<0>(chain.back()), \P)[0] || port(nextP, \C)[0] == port(std::get<0>(chain.back()), \P)[17]
// semioptional
optional
endmatch
code next
next = nextP;
// keep DSP type consistent in the chain
// currently since we only have one type anyways, this line is always false
if (next && next->type != first->type) reject;
// break infinite recursion when there's a combinational loop
if (visited.count(next) > 0) reject;
endcode
// (3) Recursively go to (2) until no more matches possible, recording the
// longest possible chain
code
if (next) {
SigSpec driver_sigP = port(std::get<0>(chain.back()), \P);
int shift = 0;
if (port(next, \C)[0] == port(std::get<0>(chain.back()), \P)[17]) shift = 17;
chain.emplace_back(next, shift);
visited.insert(next);
SigSpec sigC = unextend(port(next, \C));
// Make sure driverDSP.P === DSP.C
if (GetSize(sigC) + shift <= GetSize(driver_sigP) && driver_sigP.extract(shift, GetSize(sigC)) == sigC)
{
subpattern(tail);
}
} else {
if (GetSize(chain) > GetSize(longest_chain))
longest_chain = chain;
}
finally
if (next)
{
visited.erase(next);
chain.pop_back();
}
endcode

View file

@ -46,3 +46,19 @@ $(eval $(call add_share_file,share/xilinx,techlibs/xilinx/xc7_dsp_map.v))
$(eval $(call add_share_file,share/xilinx,techlibs/xilinx/xcu_dsp_map.v))
$(eval $(call add_share_file,share/xilinx,techlibs/xilinx/abc9_model.v))
OBJS += techlibs/xilinx/xilinx_dsp.o
GENFILES += techlibs/xilinx/xilinx_dsp_pm.h
GENFILES += techlibs/xilinx/xilinx_dsp48a_pm.h
GENFILES += techlibs/xilinx/xilinx_dsp_CREG_pm.h
GENFILES += techlibs/xilinx/xilinx_dsp_cascade_pm.h
techlibs/xilinx/xilinx_dsp.o: techlibs/xilinx/xilinx_dsp_pm.h techlibs/xilinx/xilinx_dsp48a_pm.h techlibs/xilinx/xilinx_dsp_CREG_pm.h techlibs/xilinx/xilinx_dsp_cascade_pm.h
$(eval $(call add_extra_objs,techlibs/xilinx/xilinx_dsp_pm.h))
$(eval $(call add_extra_objs,techlibs/xilinx/xilinx_dsp48a_pm.h))
$(eval $(call add_extra_objs,techlibs/xilinx/xilinx_dsp_CREG_pm.h))
$(eval $(call add_extra_objs,techlibs/xilinx/xilinx_dsp_cascade_pm.h))
OBJS += techlibs/xilinx/xilinx_srl.o
GENFILES += techlibs/xilinx/xilinx_srl_pm.h
techlibs/xilinx/xilinx_srl.o: techlibs/xilinx/xilinx_srl_pm.h
$(eval $(call add_extra_objs,techlibs/xilinx/xilinx_srl_pm.h))

View file

@ -0,0 +1,836 @@
/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
* 2019 Eddie Hung <eddie@fpgeh.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
#include <deque>
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/xilinx/xilinx_dsp_pm.h"
#include "techlibs/xilinx/xilinx_dsp48a_pm.h"
#include "techlibs/xilinx/xilinx_dsp_CREG_pm.h"
#include "techlibs/xilinx/xilinx_dsp_cascade_pm.h"
static Cell* addDsp(Module *module) {
Cell *cell = module->addCell(NEW_ID, ID(DSP48E1));
cell->setParam(ID(ACASCREG), 0);
cell->setParam(ID(ADREG), 0);
cell->setParam(ID(A_INPUT), Const("DIRECT"));
cell->setParam(ID(ALUMODEREG), 0);
cell->setParam(ID(AREG), 0);
cell->setParam(ID(BCASCREG), 0);
cell->setParam(ID(B_INPUT), Const("DIRECT"));
cell->setParam(ID(BREG), 0);
cell->setParam(ID(CARRYINREG), 0);
cell->setParam(ID(CARRYINSELREG), 0);
cell->setParam(ID(CREG), 0);
cell->setParam(ID(DREG), 0);
cell->setParam(ID(INMODEREG), 0);
cell->setParam(ID(MREG), 0);
cell->setParam(ID(OPMODEREG), 0);
cell->setParam(ID(PREG), 0);
cell->setParam(ID(USE_MULT), Const("NONE"));
cell->setParam(ID(USE_SIMD), Const("ONE48"));
cell->setParam(ID(USE_DPORT), Const("FALSE"));
cell->setPort(ID::D, Const(0, 25));
cell->setPort(ID(INMODE), Const(0, 5));
cell->setPort(ID(ALUMODE), Const(0, 4));
cell->setPort(ID(OPMODE), Const(0, 7));
cell->setPort(ID(CARRYINSEL), Const(0, 3));
cell->setPort(ID(ACIN), Const(0, 30));
cell->setPort(ID(BCIN), Const(0, 18));
cell->setPort(ID(PCIN), Const(0, 48));
cell->setPort(ID(CARRYIN), Const(0, 1));
return cell;
}
void xilinx_simd_pack(Module *module, const std::vector<Cell*> &selected_cells)
{
std::deque<Cell*> simd12_add, simd12_sub;
std::deque<Cell*> simd24_add, simd24_sub;
for (auto cell : selected_cells) {
if (!cell->type.in(ID($add), ID($sub)))
continue;
SigSpec Y = cell->getPort(ID::Y);
if (!Y.is_chunk())
continue;
if (!Y.as_chunk().wire->get_strpool_attribute(ID(use_dsp)).count("simd"))
continue;
if (GetSize(Y) > 25)
continue;
SigSpec A = cell->getPort(ID::A);
SigSpec B = cell->getPort(ID::B);
if (GetSize(Y) <= 13) {
if (GetSize(A) > 12)
continue;
if (GetSize(B) > 12)
continue;
if (cell->type == ID($add))
simd12_add.push_back(cell);
else if (cell->type == ID($sub))
simd12_sub.push_back(cell);
}
else if (GetSize(Y) <= 25) {
if (GetSize(A) > 24)
continue;
if (GetSize(B) > 24)
continue;
if (cell->type == ID($add))
simd24_add.push_back(cell);
else if (cell->type == ID($sub))
simd24_sub.push_back(cell);
}
else
log_abort();
}
auto f12 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) {
SigSpec A = lane->getPort(ID::A);
SigSpec B = lane->getPort(ID::B);
SigSpec Y = lane->getPort(ID::Y);
A.extend_u0(12, lane->getParam(ID::A_SIGNED).as_bool());
B.extend_u0(12, lane->getParam(ID::B_SIGNED).as_bool());
AB.append(A);
C.append(B);
if (GetSize(Y) < 13)
Y.append(module->addWire(NEW_ID, 13-GetSize(Y)));
else
log_assert(GetSize(Y) == 13);
P.append(Y.extract(0, 12));
CARRYOUT.append(Y[12]);
};
auto g12 = [&f12,module](std::deque<Cell*> &simd12) {
while (simd12.size() > 1) {
SigSpec AB, C, P, CARRYOUT;
Cell *lane1 = simd12.front();
simd12.pop_front();
Cell *lane2 = simd12.front();
simd12.pop_front();
Cell *lane3 = nullptr;
Cell *lane4 = nullptr;
if (!simd12.empty()) {
lane3 = simd12.front();
simd12.pop_front();
if (!simd12.empty()) {
lane4 = simd12.front();
simd12.pop_front();
}
}
log("Analysing %s.%s for Xilinx DSP SIMD12 packing.\n", log_id(module), log_id(lane1));
Cell *cell = addDsp(module);
cell->setParam(ID(USE_SIMD), Const("FOUR12"));
// X = A:B
// Y = 0
// Z = C
cell->setPort(ID(OPMODE), Const::from_string("0110011"));
log_assert(lane1);
log_assert(lane2);
f12(AB, C, P, CARRYOUT, lane1);
f12(AB, C, P, CARRYOUT, lane2);
if (lane3) {
f12(AB, C, P, CARRYOUT, lane3);
if (lane4)
f12(AB, C, P, CARRYOUT, lane4);
else {
AB.append(Const(0, 12));
C.append(Const(0, 12));
P.append(module->addWire(NEW_ID, 12));
CARRYOUT.append(module->addWire(NEW_ID, 1));
}
}
else {
AB.append(Const(0, 24));
C.append(Const(0, 24));
P.append(module->addWire(NEW_ID, 24));
CARRYOUT.append(module->addWire(NEW_ID, 2));
}
log_assert(GetSize(AB) == 48);
log_assert(GetSize(C) == 48);
log_assert(GetSize(P) == 48);
log_assert(GetSize(CARRYOUT) == 4);
cell->setPort(ID::A, AB.extract(18, 30));
cell->setPort(ID::B, AB.extract(0, 18));
cell->setPort(ID::C, C);
cell->setPort(ID::P, P);
cell->setPort(ID(CARRYOUT), CARRYOUT);
if (lane1->type == ID($sub))
cell->setPort(ID(ALUMODE), Const::from_string("0011"));
module->remove(lane1);
module->remove(lane2);
if (lane3) module->remove(lane3);
if (lane4) module->remove(lane4);
module->design->select(module, cell);
}
};
g12(simd12_add);
g12(simd12_sub);
auto f24 = [module](SigSpec &AB, SigSpec &C, SigSpec &P, SigSpec &CARRYOUT, Cell *lane) {
SigSpec A = lane->getPort(ID::A);
SigSpec B = lane->getPort(ID::B);
SigSpec Y = lane->getPort(ID::Y);
A.extend_u0(24, lane->getParam(ID::A_SIGNED).as_bool());
B.extend_u0(24, lane->getParam(ID::B_SIGNED).as_bool());
C.append(A);
AB.append(B);
if (GetSize(Y) < 25)
Y.append(module->addWire(NEW_ID, 25-GetSize(Y)));
else
log_assert(GetSize(Y) == 25);
P.append(Y.extract(0, 24));
CARRYOUT.append(module->addWire(NEW_ID)); // TWO24 uses every other bit
CARRYOUT.append(Y[24]);
};
auto g24 = [&f24,module](std::deque<Cell*> &simd24) {
while (simd24.size() > 1) {
SigSpec AB;
SigSpec C;
SigSpec P;
SigSpec CARRYOUT;
Cell *lane1 = simd24.front();
simd24.pop_front();
Cell *lane2 = simd24.front();
simd24.pop_front();
log("Analysing %s.%s for Xilinx DSP SIMD24 packing.\n", log_id(module), log_id(lane1));
Cell *cell = addDsp(module);
cell->setParam(ID(USE_SIMD), Const("TWO24"));
// X = A:B
// Y = 0
// Z = C
cell->setPort(ID(OPMODE), Const::from_string("0110011"));
log_assert(lane1);
log_assert(lane2);
f24(AB, C, P, CARRYOUT, lane1);
f24(AB, C, P, CARRYOUT, lane2);
log_assert(GetSize(AB) == 48);
log_assert(GetSize(C) == 48);
log_assert(GetSize(P) == 48);
log_assert(GetSize(CARRYOUT) == 4);
cell->setPort(ID::A, AB.extract(18, 30));
cell->setPort(ID::B, AB.extract(0, 18));
cell->setPort(ID::C, C);
cell->setPort(ID::P, P);
cell->setPort(ID(CARRYOUT), CARRYOUT);
if (lane1->type == ID($sub))
cell->setPort(ID(ALUMODE), Const::from_string("0011"));
module->remove(lane1);
module->remove(lane2);
module->design->select(module, cell);
}
};
g24(simd24_add);
g24(simd24_sub);
}
void xilinx_dsp_pack(xilinx_dsp_pm &pm)
{
auto &st = pm.st_xilinx_dsp_pack;
log("Analysing %s.%s for Xilinx DSP packing.\n", log_id(pm.module), log_id(st.dsp));
log_debug("preAdd: %s\n", log_id(st.preAdd, "--"));
log_debug("ffAD: %s\n", log_id(st.ffAD, "--"));
log_debug("ffA2: %s\n", log_id(st.ffA2, "--"));
log_debug("ffA1: %s\n", log_id(st.ffA1, "--"));
log_debug("ffB2: %s\n", log_id(st.ffB2, "--"));
log_debug("ffB1: %s\n", log_id(st.ffB1, "--"));
log_debug("ffD: %s\n", log_id(st.ffD, "--"));
log_debug("dsp: %s\n", log_id(st.dsp, "--"));
log_debug("ffM: %s\n", log_id(st.ffM, "--"));
log_debug("postAdd: %s\n", log_id(st.postAdd, "--"));
log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--"));
log_debug("ffP: %s\n", log_id(st.ffP, "--"));
log_debug("overflow: %s\n", log_id(st.overflow, "--"));
Cell *cell = st.dsp;
if (st.preAdd) {
log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type));
bool A_SIGNED = st.preAdd->getParam(ID::A_SIGNED).as_bool();
bool D_SIGNED = st.preAdd->getParam(ID::B_SIGNED).as_bool();
if (st.sigA == st.preAdd->getPort(ID::B))
std::swap(A_SIGNED, D_SIGNED);
st.sigA.extend_u0(30, A_SIGNED);
st.sigD.extend_u0(25, D_SIGNED);
cell->setPort(ID::A, st.sigA);
cell->setPort(ID::D, st.sigD);
cell->setPort(ID(INMODE), Const::from_string("00100"));
if (st.ffAD) {
if (st.ffAD->type.in(ID($dffe), ID($sdffe))) {
bool pol = st.ffAD->getParam(ID::EN_POLARITY).as_bool();
SigSpec S = st.ffAD->getPort(ID::EN);
cell->setPort(ID(CEAD), pol ? S : pm.module->Not(NEW_ID, S));
}
else
cell->setPort(ID(CEAD), State::S1);
cell->setParam(ID(ADREG), 1);
}
cell->setParam(ID(USE_DPORT), Const("TRUE"));
pm.autoremove(st.preAdd);
}
if (st.postAdd) {
log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type));
SigSpec &opmode = cell->connections_.at(ID(OPMODE));
if (st.postAddMux) {
log_assert(st.ffP);
opmode[4] = st.postAddMux->getPort(ID::S);
pm.autoremove(st.postAddMux);
}
else if (st.ffP && st.sigC == st.sigP)
opmode[4] = State::S0;
else
opmode[4] = State::S1;
opmode[6] = State::S0;
opmode[5] = State::S1;
if (opmode[4] != State::S0) {
if (st.postAddMuxAB == ID::A)
st.sigC.extend_u0(48, st.postAdd->getParam(ID::B_SIGNED).as_bool());
else
st.sigC.extend_u0(48, st.postAdd->getParam(ID::A_SIGNED).as_bool());
cell->setPort(ID::C, st.sigC);
}
pm.autoremove(st.postAdd);
}
if (st.overflow) {
log(" overflow %s (%s)\n", log_id(st.overflow), log_id(st.overflow->type));
cell->setParam(ID(USE_PATTERN_DETECT), Const("PATDET"));
cell->setParam(ID(SEL_PATTERN), Const("PATTERN"));
cell->setParam(ID(SEL_MASK), Const("MASK"));
if (st.overflow->type == ID($ge)) {
Const B = st.overflow->getPort(ID::B).as_const();
log_assert(std::count(B.begin(), B.end(), State::S1) == 1);
// Since B is an exact power of 2, subtract 1
// by inverting all bits up until hitting
// that one hi bit
for (auto &b : B.bits())
if (b == State::S0) b = State::S1;
else if (b == State::S1) {
b = State::S0;
break;
}
B.extu(48);
cell->setParam(ID(MASK), B);
cell->setParam(ID(PATTERN), Const(0, 48));
cell->setPort(ID(OVERFLOW), st.overflow->getPort(ID::Y));
}
else log_abort();
pm.autoremove(st.overflow);
}
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
auto f = [&pm,cell](SigSpec &A, Cell* ff, IdString ceport, IdString rstport) {
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol = ff->getParam(ID::SRST_POLARITY).as_bool();
cell->setPort(rstport, rstpol ? srst : pm.module->Not(NEW_ID, srst));
} else {
cell->setPort(rstport, State::S0);
}
}
if (ff->type.in(ID($dffe), ID($sdffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
}
else
cell->setPort(ceport, State::S1);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset+c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second.bits()[i] = State::Sx;
}
}
};
if (st.ffA2) {
SigSpec A = cell->getPort(ID::A);
f(A, st.ffA2, ID(CEA2), ID(RSTA));
if (st.ffA1) {
f(A, st.ffA1, ID(CEA1), IdString());
cell->setParam(ID(AREG), 2);
cell->setParam(ID(ACASCREG), 2);
}
else {
cell->setParam(ID(AREG), 1);
cell->setParam(ID(ACASCREG), 1);
}
pm.add_siguser(A, cell);
cell->setPort(ID::A, A);
}
if (st.ffB2) {
SigSpec B = cell->getPort(ID::B);
f(B, st.ffB2, ID(CEB2), ID(RSTB));
if (st.ffB1) {
f(B, st.ffB1, ID(CEB1), IdString());
cell->setParam(ID(BREG), 2);
cell->setParam(ID(BCASCREG), 2);
}
else {
cell->setParam(ID(BREG), 1);
cell->setParam(ID(BCASCREG), 1);
}
pm.add_siguser(B, cell);
cell->setPort(ID::B, B);
}
if (st.ffD) {
SigSpec D = cell->getPort(ID::D);
f(D, st.ffD, ID(CED), ID(RSTD));
pm.add_siguser(D, cell);
cell->setPort(ID::D, D);
cell->setParam(ID(DREG), 1);
}
if (st.ffM) {
SigSpec M; // unused
f(M, st.ffM, ID(CEM), ID(RSTM));
st.ffM->connections_.at(ID::Q).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM)));
cell->setParam(ID(MREG), State::S1);
}
if (st.ffP) {
SigSpec P; // unused
f(P, st.ffP, ID(CEP), ID(RSTP));
st.ffP->connections_.at(ID::Q).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
cell->setParam(ID(PREG), State::S1);
}
log(" clock: %s (%s)", log_signal(st.clock), "posedge");
if (st.ffA2) {
log(" ffA2:%s", log_id(st.ffA2));
if (st.ffA1)
log(" ffA1:%s", log_id(st.ffA1));
}
if (st.ffAD)
log(" ffAD:%s", log_id(st.ffAD));
if (st.ffB2) {
log(" ffB2:%s", log_id(st.ffB2));
if (st.ffB1)
log(" ffB1:%s", log_id(st.ffB1));
}
if (st.ffD)
log(" ffD:%s", log_id(st.ffD));
if (st.ffM)
log(" ffM:%s", log_id(st.ffM));
if (st.ffP)
log(" ffP:%s", log_id(st.ffP));
}
log("\n");
SigSpec P = st.sigP;
if (GetSize(P) < 48)
P.append(pm.module->addWire(NEW_ID, 48-GetSize(P)));
cell->setPort(ID::P, P);
pm.blacklist(cell);
}
void xilinx_dsp48a_pack(xilinx_dsp48a_pm &pm)
{
auto &st = pm.st_xilinx_dsp48a_pack;
log("Analysing %s.%s for Xilinx DSP48A/DSP48A1 packing.\n", log_id(pm.module), log_id(st.dsp));
log_debug("preAdd: %s\n", log_id(st.preAdd, "--"));
log_debug("ffA1: %s\n", log_id(st.ffA1, "--"));
log_debug("ffA0: %s\n", log_id(st.ffA0, "--"));
log_debug("ffB1: %s\n", log_id(st.ffB1, "--"));
log_debug("ffB0: %s\n", log_id(st.ffB0, "--"));
log_debug("ffD: %s\n", log_id(st.ffD, "--"));
log_debug("dsp: %s\n", log_id(st.dsp, "--"));
log_debug("ffM: %s\n", log_id(st.ffM, "--"));
log_debug("postAdd: %s\n", log_id(st.postAdd, "--"));
log_debug("postAddMux: %s\n", log_id(st.postAddMux, "--"));
log_debug("ffP: %s\n", log_id(st.ffP, "--"));
Cell *cell = st.dsp;
SigSpec &opmode = cell->connections_.at(ID(OPMODE));
if (st.preAdd) {
log(" preadder %s (%s)\n", log_id(st.preAdd), log_id(st.preAdd->type));
bool D_SIGNED = st.preAdd->getParam(ID::A_SIGNED).as_bool();
bool B_SIGNED = st.preAdd->getParam(ID::B_SIGNED).as_bool();
st.sigB.extend_u0(18, B_SIGNED);
st.sigD.extend_u0(18, D_SIGNED);
cell->setPort(ID::B, st.sigB);
cell->setPort(ID::D, st.sigD);
opmode[4] = State::S1;
if (st.preAdd->type == ID($add))
opmode[6] = State::S0;
else if (st.preAdd->type == ID($sub))
opmode[6] = State::S1;
else
log_assert(!"strange pre-adder type");
pm.autoremove(st.preAdd);
}
if (st.postAdd) {
log(" postadder %s (%s)\n", log_id(st.postAdd), log_id(st.postAdd->type));
if (st.postAddMux) {
log_assert(st.ffP);
opmode[2] = st.postAddMux->getPort(ID::S);
pm.autoremove(st.postAddMux);
}
else if (st.ffP && st.sigC == st.sigP)
opmode[2] = State::S0;
else
opmode[2] = State::S1;
opmode[3] = State::S1;
if (opmode[2] != State::S0) {
if (st.postAddMuxAB == ID::A)
st.sigC.extend_u0(48, st.postAdd->getParam(ID::B_SIGNED).as_bool());
else
st.sigC.extend_u0(48, st.postAdd->getParam(ID::A_SIGNED).as_bool());
cell->setPort(ID::C, st.sigC);
}
pm.autoremove(st.postAdd);
}
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
auto f = [&pm,cell](SigSpec &A, Cell* ff, IdString ceport, IdString rstport) {
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol = ff->getParam(ID::SRST_POLARITY).as_bool();
cell->setPort(rstport, rstpol ? srst : pm.module->Not(NEW_ID, srst));
} else {
cell->setPort(rstport, State::S0);
}
}
if (ff->type.in(ID($dffe), ID($sdffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
}
else
cell->setPort(ceport, State::S1);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset+c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second.bits()[i] = State::Sx;
}
}
};
if (st.ffA0 || st.ffA1) {
SigSpec A = cell->getPort(ID::A);
if (st.ffA1) {
f(A, st.ffA1, ID(CEA), ID(RSTA));
cell->setParam(ID(A1REG), 1);
}
if (st.ffA0) {
f(A, st.ffA0, ID(CEA), ID(RSTA));
cell->setParam(ID(A0REG), 1);
}
pm.add_siguser(A, cell);
cell->setPort(ID::A, A);
}
if (st.ffB0 || st.ffB1) {
SigSpec B = cell->getPort(ID::B);
if (st.ffB1) {
f(B, st.ffB1, ID(CEB), ID(RSTB));
cell->setParam(ID(B1REG), 1);
}
if (st.ffB0) {
f(B, st.ffB0, ID(CEB), ID(RSTB));
cell->setParam(ID(B0REG), 1);
}
pm.add_siguser(B, cell);
cell->setPort(ID::B, B);
}
if (st.ffD) {
SigSpec D = cell->getPort(ID::D);
f(D, st.ffD, ID(CED), ID(RSTD));
pm.add_siguser(D, cell);
cell->setPort(ID::D, D);
cell->setParam(ID(DREG), 1);
}
if (st.ffM) {
SigSpec M; // unused
f(M, st.ffM, ID(CEM), ID(RSTM));
st.ffM->connections_.at(ID::Q).replace(st.sigM, pm.module->addWire(NEW_ID, GetSize(st.sigM)));
cell->setParam(ID(MREG), State::S1);
}
if (st.ffP) {
SigSpec P; // unused
f(P, st.ffP, ID(CEP), ID(RSTP));
st.ffP->connections_.at(ID::Q).replace(st.sigP, pm.module->addWire(NEW_ID, GetSize(st.sigP)));
cell->setParam(ID(PREG), State::S1);
}
log(" clock: %s (%s)", log_signal(st.clock), "posedge");
if (st.ffA0)
log(" ffA0:%s", log_id(st.ffA0));
if (st.ffA1)
log(" ffA1:%s", log_id(st.ffA1));
if (st.ffB0)
log(" ffB0:%s", log_id(st.ffB0));
if (st.ffB1)
log(" ffB1:%s", log_id(st.ffB1));
if (st.ffD)
log(" ffD:%s", log_id(st.ffD));
if (st.ffM)
log(" ffM:%s", log_id(st.ffM));
if (st.ffP)
log(" ffP:%s", log_id(st.ffP));
}
log("\n");
SigSpec P = st.sigP;
if (GetSize(P) < 48)
P.append(pm.module->addWire(NEW_ID, 48-GetSize(P)));
cell->setPort(ID::P, P);
pm.blacklist(cell);
}
void xilinx_dsp_packC(xilinx_dsp_CREG_pm &pm)
{
auto &st = pm.st_xilinx_dsp_packC;
log_debug("Analysing %s.%s for Xilinx DSP packing (CREG).\n", log_id(pm.module), log_id(st.dsp));
log_debug("ffC: %s\n", log_id(st.ffC, "--"));
Cell *cell = st.dsp;
if (st.clock != SigBit())
{
cell->setPort(ID::CLK, st.clock);
auto f = [&pm,cell](SigSpec &A, Cell* ff, IdString ceport, IdString rstport) {
SigSpec D = ff->getPort(ID::D);
SigSpec Q = pm.sigmap(ff->getPort(ID::Q));
if (!A.empty())
A.replace(Q, D);
if (rstport != IdString()) {
if (ff->type.in(ID($sdff), ID($sdffe))) {
SigSpec srst = ff->getPort(ID::SRST);
bool rstpol = ff->getParam(ID::SRST_POLARITY).as_bool();
cell->setPort(rstport, rstpol ? srst : pm.module->Not(NEW_ID, srst));
} else {
cell->setPort(rstport, State::S0);
}
}
if (ff->type.in(ID($dffe), ID($sdffe))) {
SigSpec ce = ff->getPort(ID::EN);
bool cepol = ff->getParam(ID::EN_POLARITY).as_bool();
cell->setPort(ceport, cepol ? ce : pm.module->Not(NEW_ID, ce));
}
else
cell->setPort(ceport, State::S1);
for (auto c : Q.chunks()) {
auto it = c.wire->attributes.find(ID::init);
if (it == c.wire->attributes.end())
continue;
for (int i = c.offset; i < c.offset+c.width; i++) {
log_assert(it->second[i] == State::S0 || it->second[i] == State::Sx);
it->second.bits()[i] = State::Sx;
}
}
};
if (st.ffC) {
SigSpec C = cell->getPort(ID::C);
f(C, st.ffC, ID(CEC), ID(RSTC));
pm.add_siguser(C, cell);
cell->setPort(ID::C, C);
cell->setParam(ID(CREG), 1);
}
log(" clock: %s (%s)", log_signal(st.clock), "posedge");
if (st.ffC)
log(" ffC:%s", log_id(st.ffC));
log("\n");
}
pm.blacklist(cell);
}
struct XilinxDspPass : public Pass {
XilinxDspPass() : Pass("xilinx_dsp", "Xilinx: pack resources into DSPs") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" xilinx_dsp [options] [selection]\n");
log("\n");
log("Pack input registers (A2, A1, B2, B1, C, D, AD; with optional enable/reset),\n");
log("pipeline registers (M; with optional enable/reset), output registers (P; with\n");
log("optional enable/reset), pre-adder and/or post-adder into Xilinx DSP resources.\n");
log("\n");
log("Multiply-accumulate operations using the post-adder with feedback on the 'C'\n");
log("input will be folded into the DSP. In this scenario only, the 'C' input can be\n");
log("used to override the current accumulation result with a new value, which will\n");
log("be added to the multiplier result to form the next accumulation result.\n");
log("\n");
log("Use of the dedicated 'PCOUT' -> 'PCIN' cascade path is detected for 'P' -> 'C'\n");
log("connections (optionally, where 'P' is right-shifted by 17-bits and used as an\n");
log("input to the post-adder -- a pattern common for summing partial products to\n");
log("implement wide multipliers). Limited support also exists for similar cascading\n");
log("for A and B using '[AB]COUT' -> '[AB]CIN'. Currently, cascade chains are limited\n");
log("to a maximum length of 20 cells, corresponding to the smallest Xilinx 7 Series\n");
log("device.\n");
log("\n");
log("This pass is a no-op if the scratchpad variable 'xilinx_dsp.multonly' is set\n");
log("to 1.\n");
log("\n");
log("\n");
log("Experimental feature: addition/subtractions less than 12 or 24 bits with the\n");
log("'(* use_dsp=\"simd\" *)' attribute attached to the output wire or attached to\n");
log("the add/subtract operator will cause those operations to be implemented using\n");
log("the 'SIMD' feature of DSPs.\n");
log("\n");
log("Experimental feature: the presence of a `$ge' cell attached to the registered\n");
log("P output implementing the operation \"(P >= <power-of-2>)\" will be transformed\n");
log("into using the DSP48E1's pattern detector feature for overflow detection.\n");
log("\n");
log(" -family {xcup|xcu|xc7|xc6v|xc5v|xc4v|xc6s|xc3sda}\n");
log(" select the family to target\n");
log(" default: xc7\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing XILINX_DSP pass (pack resources into DSPs).\n");
std::string family = "xc7";
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
if ((args[argidx] == "-family" || args[argidx] == "-arch") && argidx+1 < args.size()) {
family = args[++argidx];
continue;
}
break;
}
extra_args(args, argidx, design);
// Don't bother distinguishing between those.
if (family == "xc6v")
family = "xc7";
if (family == "xcup")
family = "xcu";
for (auto module : design->selected_modules()) {
if (design->scratchpad_get_bool("xilinx_dsp.multonly"))
continue;
// Experimental feature: pack $add/$sub cells with
// (* use_dsp48="simd" *) into DSP48E1's using its
// SIMD feature
if (family == "xc7")
xilinx_simd_pack(module, module->selected_cells());
// Match for all features ([ABDMP][12]?REG, pre-adder,
// post-adder, pattern detector, etc.) except for CREG
if (family == "xc7") {
xilinx_dsp_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
} else if (family == "xc6s" || family == "xc3sda") {
xilinx_dsp48a_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp48a_pack(xilinx_dsp48a_pack);
}
// Separating out CREG packing is necessary since there
// is no guarantee that the cell ordering corresponds
// to the "expected" case (i.e. the order in which
// they appear in the source) thus the possiblity
// existed that a register got packed as a CREG into a
// downstream DSP that should have otherwise been a
// PREG of an upstream DSP that had not been visited
// yet
{
xilinx_dsp_CREG_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
}
// Lastly, identify and utilise PCOUT -> PCIN,
// ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
// chains
{
xilinx_dsp_cascade_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_cascade();
}
}
}
} XilinxDspPass;
PRIVATE_NAMESPACE_END

View file

@ -0,0 +1,490 @@
// This file describes the main pattern matcher setup (of three total) that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// ( 1) Starting from a DSP48E1 cell
// ( 2) Match the driver of the 'A' input to a possible $sdffe cell (ADREG)
// If ADREG matched, treat 'A' input as input of ADREG
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
// ( 4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// If A2REG, then match 'A' input for A1REG
// ( 5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
// ( 6) Match 'D' input for DREG
// ( 7) Match 'P' output that exclusively drives an MREG
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
// ( 9) Match 'P' output that exclusively drives a PREG
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
// to the 'P' output where it is compared to a constant that is a
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
// Notes:
// - The intention of this pattern matcher is for it to be compatible with
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
// user instantiations that may already contain the cells being packed...
// (though the latter is currently untested)
// - Since the $sdffe pattern is used
// for each *REG match, it has been factored out into two subpatterns:
// in_dffe and out_dffe located at the bottom of this file.
// - Matching for pattern detector features is currently incomplete. For
// example, matching for underflow as well as overflow detection is
// possible, as would auto-reset, enabling saturated arithmetic, detecting
// custom patterns, etc.
pattern xilinx_dsp_pack
state <SigBit> clock
state <SigSpec> sigA sigB sigC sigD sigM sigP
state <IdString> postAddAB postAddMuxAB
state <Cell*> ffAD ffA1 ffA2
state <Cell*> ffB1 ffB2
state <Cell*> ffD ffM ffP
// Variables used for subpatterns
state <SigSpec> argQ argD
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP48E1 cell
match dsp
select dsp->type.in(\DSP48E1)
endmatch
code sigA sigB sigC sigD sigM clock
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigA = unextend(port(dsp, \A));
sigB = unextend(port(dsp, \B));
sigC = port(dsp, \C, SigSpec());
sigD = port(dsp, \D, SigSpec());
SigSpec P = port(dsp, \P);
if (param(dsp, \USE_MULT).decode_string() == "MULTIPLY") {
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
// This sigM could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
sigM = P.extract(0, i);
}
else
sigM = P;
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'A' input to a possible $sdffe cell (ADREG)
// If matched, treat 'A' input as input of ADREG
code argQ ffAD sigA clock
if (param(dsp, \ADREG).as_int() == 0) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffAD = dff;
clock = dffclock;
sigA = dffD;
}
}
endcode
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
match preAdd
if sigD.empty() || sigD.is_fully_zero()
// Ensure that preAdder not already used
if param(dsp, \USE_DPORT).decode_string() == "FALSE"
if port(dsp, \INMODE, Const(0, 5)).is_fully_zero()
select preAdd->type.in($add)
// Output has to be 25 bits or less
select GetSize(port(preAdd, \Y)) <= 25
select nusers(port(preAdd, \Y)) == 2
choice <IdString> AB {\A, \B}
// A port has to be 30 bits or less
select GetSize(port(preAdd, AB)) <= 30
define <IdString> BA (AB == \A ? \B : \A)
// D port has to be 25 bits or less
select GetSize(port(preAdd, BA)) <= 25
index <SigSpec> port(preAdd, \Y) === sigA
optional
endmatch
code sigA sigD
if (preAdd) {
sigA = port(preAdd, \A);
sigD = port(preAdd, \B);
}
endcode
// (4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// Then match 'A' input for A1REG
code argQ ffAD sigA clock ffA2 ffA1
// Only search for ffA2 if there was a pre-adder
// (otherwise ffA2 would have been matched as ffAD)
if (preAdd) {
if (param(dsp, \AREG).as_int() == 0) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffA2 = dff;
clock = dffclock;
sigA = dffD;
}
}
}
// And if there wasn't a pre-adder,
// move AD register to A
else if (ffAD) {
log_assert(!ffA2);
std::swap(ffA2, ffAD);
}
// Now attempt to match A1
if (ffA2) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
if (dff->type != ffA2->type)
goto ffA1_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffA2, \SRST_POLARITY))
goto ffA1_end;
if (port(dff, \SRST) != port(ffA2, \SRST))
goto ffA1_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffA2, \EN_POLARITY))
goto ffA1_end;
if (port(dff, \EN) != port(ffA2, \EN))
goto ffA1_end;
}
ffA1 = dff;
clock = dffclock;
sigA = dffD;
ffA1_end: ;
}
}
endcode
// (5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
code argQ ffB2 sigB clock ffB1
if (param(dsp, \BREG).as_int() == 0) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
ffB2 = dff;
clock = dffclock;
sigB = dffD;
// Now attempt to match B1
if (ffB2) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
if (dff->type != ffB2->type)
goto ffB1_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffB2, \SRST_POLARITY))
goto ffB1_end;
if (port(dff, \SRST) != port(ffB2, \SRST))
goto ffB1_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffB2, \EN_POLARITY))
goto ffB1_end;
if (port(dff, \EN) != port(ffB2, \EN))
goto ffB1_end;
}
ffB1 = dff;
clock = dffclock;
sigB = dffD;
ffB1_end: ;
}
}
}
}
endcode
// (6) Match 'D' input for DREG
code argQ ffD sigD clock
if (param(dsp, \DREG).as_int() == 0) {
argQ = sigD;
subpattern(in_dffe);
if (dff) {
ffD = dff;
clock = dffclock;
sigD = dffD;
}
}
endcode
// (7) Match 'P' output that exclusively drives an MREG
code argD ffM sigM sigP clock
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
argD = sigM;
subpattern(out_dffe);
if (dff) {
ffM = dff;
clock = dffclock;
sigM = dffQ;
}
}
sigP = sigM;
endcode
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
match postAdd
// Ensure that Z mux is not already used
if port(dsp, \OPMODE, SigSpec(0, 7)).extract(4,3).is_fully_zero()
select postAdd->type.in($add)
select GetSize(port(postAdd, \Y)) <= 48
choice <IdString> AB {\A, \B}
select nusers(port(postAdd, AB)) == 2
index <SigBit> port(postAdd, AB)[0] === sigP[0]
filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
// Check that remainder of AB is a sign- or zero-extension
filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
set postAddAB AB
optional
endmatch
code sigC sigP
if (postAdd) {
sigC = port(postAdd, postAddAB == \A ? \B : \A);
sigP = port(postAdd, \Y);
}
endcode
// (9) Match 'P' output that exclusively drives a PREG
code argD ffP sigP clock
if (param(dsp, \PREG).as_int() == 0) {
if (nusers(sigP) == 2) {
argD = sigP;
subpattern(out_dffe);
if (dff) {
ffP = dff;
clock = dffclock;
sigP = dffQ;
}
}
}
endcode
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
match postAddMux
if postAdd
if ffP
select postAddMux->type.in($mux)
select nusers(port(postAddMux, \Y)) == 2
choice <IdString> AB {\A, \B}
index <SigSpec> port(postAddMux, AB) === sigP
index <SigSpec> port(postAddMux, \Y) === sigC
set postAddMuxAB AB
optional
endmatch
code sigC
if (postAddMux)
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
endcode
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
// the 'P' output where it is compared to a constant that is a power-of-2:
// e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
match overflow
if ffP
if param(dsp, \USE_PATTERN_DETECT).decode_string() == "NO_PATDET"
select overflow->type.in($ge)
select GetSize(port(overflow, \Y)) <= 48
select port(overflow, \B).is_fully_const()
define <Const> B port(overflow, \B).as_const()
select std::count(B.begin(), B.end(), State::S1) == 1
index <SigSpec> port(overflow, \A) === sigP
optional
endmatch
code
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// Check that reset value, if present, is fully 0.
filter ff->type.in($dff, $dffe) || param(ff, \SRST_VALUE).is_fully_zero()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
endcode
// #######################
// Subpattern for matching against output registers, based on knowledge of the
// 'D' input.
// At a high level:
// (1) Starting from an optional $mux cell that implements clock enable
// semantics --- one where the given 'D' argument (partially or fully)
// drives one of its two inputs
// (2) Starting from, or continuing onto, another optional $mux cell that
// implements synchronous reset semantics --- one where the given 'D'
// argument (or the clock enable $mux output) drives one of its two inputs
// and where the other input is fully zero
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
// output of the previous clock enable or reset $mux cells)
subpattern out_dffe
arg argD argQ clock
code
dff = nullptr;
for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode

View file

@ -0,0 +1,429 @@
// This file describes the main pattern matcher setup (of three total) that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc - version for
// DSP48A/DSP48A1 (Spartan 3A DSP, Spartan 6).
// At a high level, it works as follows:
// ( 1) Starting from a DSP48A/DSP48A1 cell
// ( 2) Match the driver of the 'B' input to a possible $dff cell (B1REG)
// If B1REG matched, treat 'B' input as input of B1REG
// ( 3) Match the driver of the 'B' and 'D' inputs for a possible $add cell
// (pre-adder)
// ( 4) Match 'B' input for B0REG
// ( 5) Match 'A' input for A1REG
// If A1REG, then match 'A' input for A0REG
// ( 6) Match 'D' input for DREG
// ( 7) Match 'P' output that exclusively drives an MREG
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
// ( 9) Match 'P' output that exclusively drives a PREG
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
// Notes: see the notes in xilinx_dsp.pmg
pattern xilinx_dsp48a_pack
state <SigBit> clock
state <SigSpec> sigA sigB sigC sigD sigM sigP
state <IdString> postAddAB postAddMuxAB
state <Cell*> ffA0 ffA1
state <Cell*> ffB0 ffB1
state <Cell*> ffD ffM ffP
// Variables used for subpatterns
state <SigSpec> argQ argD
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP48A/DSP48A1 cell
match dsp
select dsp->type.in(\DSP48A, \DSP48A1)
endmatch
code sigA sigB sigC sigD sigM clock
auto unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigA = unextend(port(dsp, \A));
sigB = unextend(port(dsp, \B));
sigC = port(dsp, \C, SigSpec());
sigD = port(dsp, \D, SigSpec());
SigSpec P = port(dsp, \P);
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
// This sigM could have no users if downstream sinks (e.g. $add) is
// narrower than $mul result, for example
if (i == 0)
reject;
sigM = P.extract(0, i);
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'B' input to a possible $dff cell (B1REG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed above)
// If matched, treat 'B' input as input of B1REG
code argQ ffB1 sigB clock
if (param(dsp, \B1REG).as_int() == 0 && param(dsp, \B0REG).as_int() == 0 && port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
ffB1 = dff;
clock = dffclock;
sigB = dffD;
}
}
endcode
// (3) Match the driver of the 'B' and 'D' inputs for a possible $add cell
// (pre-adder)
match preAdd
if sigD.empty() || sigD.is_fully_zero()
if param(dsp, \B0REG).as_int() == 0
// Ensure that preAdder not already used
if port(dsp, \OPMODE, SigSpec()).extract(4, 1).is_fully_zero()
select preAdd->type.in($add, $sub)
// Output has to be 18 bits or less
select GetSize(port(preAdd, \Y)) <= 18
select nusers(port(preAdd, \Y)) == 2
// D port has to be 18 bits or less
select GetSize(port(preAdd, \A)) <= 18
// B port has to be 18 bits or less
select GetSize(port(preAdd, \B)) <= 18
index <SigSpec> port(preAdd, \Y) === sigB
optional
endmatch
code sigB sigD
if (preAdd) {
sigD = port(preAdd, \A);
sigB = port(preAdd, \B);
}
endcode
// (4) Match 'B' input for B0REG
code argQ ffB0 sigB clock
if (param(dsp, \B0REG).as_int() == 0) {
argQ = sigB;
subpattern(in_dffe);
if (dff) {
if (ffB1) {
if (dff->type != ffB1->type)
goto ffB0_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffB1, \SRST_POLARITY))
goto ffB0_end;
if (port(dff, \SRST) != port(ffB1, \SRST))
goto ffB0_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffB1, \EN_POLARITY))
goto ffB0_end;
if (port(dff, \EN) != port(ffB1, \EN))
goto ffB0_end;
}
}
ffB0 = dff;
clock = dffclock;
sigB = dffD;
}
}
ffB0_end:
endcode
// (5) Match 'A' input for A1REG
// If A1REG, then match 'A' input for A0REG
code argQ ffA1 sigA clock ffA0
if (param(dsp, \A0REG).as_int() == 0 && param(dsp, \A1REG).as_int() == 0) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
ffA1 = dff;
clock = dffclock;
sigA = dffD;
// Now attempt to match A0
if (ffA1) {
argQ = sigA;
subpattern(in_dffe);
if (dff) {
if (dff->type != ffA1->type)
goto ffA0_end;
if (dff->type.in($sdff, $sdffe, $sdffce)) {
if (param(dff, \SRST_POLARITY) != param(ffA1, \SRST_POLARITY))
goto ffA0_end;
if (port(dff, \SRST) != port(ffA1, \SRST))
goto ffA0_end;
}
if (dff->type.in($dffe, $sdffe, $sdffce)) {
if (param(dff, \EN_POLARITY) != param(ffA1, \EN_POLARITY))
goto ffA0_end;
if (port(dff, \EN) != port(ffA1, \EN))
goto ffA0_end;
}
ffA0 = dff;
clock = dffclock;
sigA = dffD;
ffA0_end: ;
}
}
}
}
endcode
// (6) Match 'D' input for DREG
code argQ ffD sigD clock
if (param(dsp, \DREG).as_int() == 0) {
argQ = sigD;
subpattern(in_dffe);
if (dff) {
ffD = dff;
clock = dffclock;
sigD = dffD;
}
}
endcode
// (7) Match 'P' output that exclusively drives an MREG
code argD ffM sigM sigP clock
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
argD = sigM;
subpattern(out_dffe);
if (dff) {
ffM = dff;
clock = dffclock;
sigM = dffQ;
}
}
sigP = sigM;
endcode
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
match postAdd
// Ensure that Z mux is not already used
if port(dsp, \OPMODE, SigSpec()).extract(2,2).is_fully_zero()
select postAdd->type.in($add)
select GetSize(port(postAdd, \Y)) <= 48
choice <IdString> AB {\A, \B}
select nusers(port(postAdd, AB)) == 2
index <SigBit> port(postAdd, AB)[0] === sigP[0]
filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
// Check that remainder of AB is a sign- or zero-extension
filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
set postAddAB AB
optional
endmatch
code sigC sigP
if (postAdd) {
sigC = port(postAdd, postAddAB == \A ? \B : \A);
sigP = port(postAdd, \Y);
}
endcode
// (9) Match 'P' output that exclusively drives a PREG
code argD ffP sigP clock
if (param(dsp, \PREG).as_int() == 0) {
if (nusers(sigP) == 2) {
argD = sigP;
subpattern(out_dffe);
if (dff) {
ffP = dff;
clock = dffclock;
sigP = dffQ;
}
}
}
endcode
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
match postAddMux
if postAdd
if ffP
select postAddMux->type.in($mux)
select nusers(port(postAddMux, \Y)) == 2
choice <IdString> AB {\A, \B}
index <SigSpec> port(postAddMux, AB) === sigP
index <SigSpec> port(postAddMux, \Y) === sigC
set postAddMuxAB AB
optional
endmatch
code sigC
if (postAddMux)
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
endcode
code
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// Check that reset value, if present, is fully 0.
filter ff->type.in($dff, $dffe) || param(ff, \SRST_VALUE).is_fully_zero()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
endcode
// #######################
// Subpattern for matching against output registers, based on knowledge of the
// 'D' input.
// At a high level:
// (1) Starting from an optional $mux cell that implements clock enable
// semantics --- one where the given 'D' argument (partially or fully)
// drives one of its two inputs
// (2) Starting from, or continuing onto, another optional $mux cell that
// implements synchronous reset semantics --- one where the given 'D'
// argument (or the clock enable $mux output) drives one of its two inputs
// and where the other input is fully zero
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
// output of the previous clock enable or reset $mux cells)
subpattern out_dffe
arg argD argQ clock
code
dff = nullptr;
for (auto c : argD.chunks())
// Abandon matches when 'D' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \D)[offset] === argD[0]
// Check that the rest of argD is present
filter GetSize(port(ff, \D)) >= offset + GetSize(argD)
filter port(ff, \D).extract(offset, GetSize(argD)) == argD
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec D = port(ff, \D);
SigSpec Q = port(ff, \Q);
argQ = argD;
argQ.replace(D, Q);
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
for (auto c : argQ.chunks()) {
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
dff = ff;
dffQ = argQ;
dffclock = port(ff, \CLK);
endcode

View file

@ -0,0 +1,149 @@
// This file describes the second of three pattern matcher setups that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// Notes:
// - Running CREG packing after xilinx_dsp_pack is necessary since there is no
// guarantee that the cell ordering corresponds to the "expected" case (i.e.
// the order in which they appear in the source) thus the possiblity existed
// that a register got packed as a CREG into a downstream DSP that should
// have otherwise been a PREG of an upstream DSP that had not been visited
// yet
// - The reason this is separated out from the xilinx_dsp.pmg file is
// for efficiency --- each *.pmg file creates a class of the same basename,
// which when constructed, creates a custom database tailored to the
// pattern(s) contained within. Since the pattern in this file must be
// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
// to reconstruct this database. Separating the two patterns into
// independent files causes two smaller, more specific, databases.
pattern xilinx_dsp_packC
udata <std::function<SigSpec(const SigSpec&)>> unextend
state <SigBit> clock
state <SigSpec> sigC sigP
state <Cell*> ffC
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
// (1) Starting from a DSP48* cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
match dsp
select dsp->type.in(\DSP48A, \DSP48A1, \DSP48E1)
select param(dsp, \CREG).as_int() == 0
select nusers(port(dsp, \C, SigSpec())) > 1
endmatch
code sigC sigP clock
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
sigC = unextend(port(dsp, \C, SigSpec()));
SigSpec P = port(dsp, \P);
if (!dsp->type.in(\DSP48E1) ||
param(dsp, \USE_MULT).decode_string() == "MULTIPLY") {
// Only care about those bits that are used
int i;
for (i = GetSize(P)-1; i >= 0; i--)
if (nusers(P[i]) > 1)
break;
i++;
log_assert(nusers(P.extract_end(i)) <= 1);
sigP = P.extract(0, i);
}
else
sigP = P;
clock = port(dsp, \CLK, SigBit());
endcode
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using the in_dffe subpattern)
code argQ ffC sigC clock
argQ = sigC;
subpattern(in_dffe);
if (dff) {
ffC = dff;
clock = dffclock;
sigC = dffD;
}
endcode
code
if (ffC)
accept;
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// Check that reset value, if present, is fully 0.
filter ff->type.in($dff, $dffe) || param(ff, \SRST_VALUE).is_fully_zero()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
endcode

View file

@ -0,0 +1,409 @@
// This file describes the third of three pattern matcher setups that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
// (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
// use the 'PCOUT' port
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
// (b) has its Z multiplexer output set to the 'C' port, which is
// driven by the 'P' output of the previous DSP cell, and (c) has its
// 'PCIN' port unused
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
// previous DSP cell right-shifted by 17 bits
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
// if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
// DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
// have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
// use its ACOUT port, then examine if an ACOUT -> ACIN cascade
// opportunity exists by matching for a $dff-with-optional-clock-enable-
// or-reset and checking that the 'D' input of this register is the same
// as the 'A' input of the previous DSP
// (4) Same as (3) but for BCOUT -> BCIN cascade
// (5) Recursively go to (2.1) until no more matches possible, keeping track
// of the longest possible chain found
// (6) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
// Notes:
// - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered
// if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need
// not be the case --- [AB] cascades can exist independently of a P cascade
// (though all three cascades must come from the same DSP). This situation
// is not handled currently.
// - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently
// conservative in that they examine the situation where (a) the previous
// DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no
// registers enabled, and (c) that there exists only one additional register
// between the upstream and downstream DSPs. This can certainly be relaxed
// to identify situations ranging from (i) neither DSP uses any registers,
// to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and
// there exists a further 2 registers between them. This remains a TODO
// item.
pattern xilinx_dsp_cascade
udata <std::function<SigSpec(const SigSpec&)>> unextend
udata <vector<std::tuple<Cell*,int,int,int>>> chain longest_chain
state <Cell*> next
state <SigBit> clock
state <int> AREG BREG
// Variables used for subpatterns
state <SigSpec> argQ argD
state <int> ffoffset
udata <SigSpec> dffD dffQ
udata <SigBit> dffclock
udata <Cell*> dff
code
#define MAX_DSP_CASCADE 20
endcode
// (1) Starting from a DSP48* cell that (a) has the Z multiplexer
// (controlled by OPMODE[3:2] for DSP48A*, by OPMODE[6:4] for DSP48E1)
// set to zero and (b) doesn't already use the 'PCOUT' port
match first
select (first->type.in(\DSP48A, \DSP48A1) && port(first, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("00")) || (first->type.in(\DSP48E1) && port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000"))
select nusers(port(first, \PCOUT, SigSpec())) <= 1
endmatch
// (6) The longest chain is then divided into chunks of no more than
// MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
// height of a DSP column) with each DSP in each chunk being rewritten
// to use [ABP]COUT -> [ABP]CIN cascading as appropriate
code
longest_chain.clear();
chain.emplace_back(first, -1, -1, -1);
subpattern(tail);
finally
chain.pop_back();
log_assert(chain.empty());
if (GetSize(longest_chain) > 1) {
Cell *dsp = std::get<0>(longest_chain.front());
Cell *dsp_pcin;
int P, AREG, BREG;
for (int i = 1; i < GetSize(longest_chain); i++) {
std::tie(dsp_pcin,P,AREG,BREG) = longest_chain[i];
if (i % MAX_DSP_CASCADE > 0) {
if (P >= 0) {
Wire *cascade = module->addWire(NEW_ID, 48);
dsp_pcin->setPort(ID(C), Const(0, 48));
dsp_pcin->setPort(ID(PCIN), cascade);
dsp->setPort(ID(PCOUT), cascade);
add_siguser(cascade, dsp_pcin);
add_siguser(cascade, dsp);
SigSpec opmode = port(dsp_pcin, \OPMODE, Const(0, 7));
if (dsp->type.in(\DSP48A, \DSP48A1)) {
log_assert(P == 0);
opmode[3] = State::S0;
opmode[2] = State::S1;
}
else if (dsp->type.in(\DSP48E1)) {
if (P == 17)
opmode[6] = State::S1;
else if (P == 0)
opmode[6] = State::S0;
else log_abort();
opmode[5] = State::S0;
opmode[4] = State::S1;
}
dsp_pcin->setPort(\OPMODE, opmode);
log_debug("PCOUT -> PCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
}
if (AREG >= 0) {
Wire *cascade = module->addWire(NEW_ID, 30);
dsp_pcin->setPort(ID(A), Const(0, 30));
dsp_pcin->setPort(ID(ACIN), cascade);
dsp->setPort(ID(ACOUT), cascade);
add_siguser(cascade, dsp_pcin);
add_siguser(cascade, dsp);
if (dsp->type.in(\DSP48E1))
dsp->setParam(ID(ACASCREG), AREG);
dsp_pcin->setParam(ID(A_INPUT), Const("CASCADE"));
log_debug("ACOUT -> ACIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
}
if (BREG >= 0) {
Wire *cascade = module->addWire(NEW_ID, 18);
if (dsp->type.in(\DSP48A, \DSP48A1)) {
// According to UG389 p9 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf]
// "The DSP48A1 component uses this input when cascading
// BCOUT from an adjacent DSP48A1 slice. The tools then
// translate BCOUT cascading to the dedicated BCIN input
// and set the B_INPUT attribute for implementation."
dsp_pcin->setPort(ID(B), cascade);
}
else {
dsp_pcin->setPort(ID(B), Const(0, 18));
dsp_pcin->setPort(ID(BCIN), cascade);
}
dsp->setPort(ID(BCOUT), cascade);
add_siguser(cascade, dsp_pcin);
add_siguser(cascade, dsp);
if (dsp->type.in(\DSP48E1)) {
dsp->setParam(ID(BCASCREG), BREG);
// According to UG389 p13 [https://www.xilinx.com/support/documentation/user_guides/ug389.pdf]
// "The attribute is only used by place and route tools and
// is not necessary for the users to set for synthesis. The
// attribute is determined by the connection to the B port
// of the DSP48A1 slice. If the B port is connected to the
// BCOUT of another DSP48A1 slice, then the tools automatically
// set the attribute to 'CASCADE', otherwise it is set to
// 'DIRECT'".
dsp_pcin->setParam(ID(B_INPUT), Const("CASCADE"));
}
log_debug("BCOUT -> BCIN cascade for %s -> %s\n", log_id(dsp), log_id(dsp_pcin));
}
}
else {
log_debug(" Blocking %s -> %s cascade (exceeds max: %d)\n", log_id(dsp), log_id(dsp_pcin), MAX_DSP_CASCADE);
}
dsp = dsp_pcin;
}
accept;
}
endcode
// ------------------------------------------------------------------
subpattern tail
arg first
arg next
// (2.1) Match another DSP48* cell that (a) does not have the CREG enabled,
// (b) has its Z multiplexer output set to the 'C' port, which is
// driven by the 'P' output of the previous DSP cell, and (c) has its
// 'PCIN' port unused
match nextP
select !nextP->type.in(\DSP48E1) || !param(nextP, \CREG).as_bool()
select (nextP->type.in(\DSP48A, \DSP48A1) && port(nextP, \OPMODE, Const(0, 8)).extract(2,2) == Const::from_string("11")) || (nextP->type.in(\DSP48E1) && port(nextP, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011"))
select nusers(port(nextP, \C, SigSpec())) > 1
select nusers(port(nextP, \PCIN, SigSpec())) == 0
index <SigBit> port(nextP, \C)[0] === port(std::get<0>(chain.back()), \P)[0]
semioptional
endmatch
// (2.2) For DSP48E1 only, same as (2.1) but with the 'C' port driven
// by the 'P' output of the previous DSP cell right-shifted by 17 bits
match nextP_shift17
if !nextP
select nextP_shift17->type.in(\DSP48E1)
select !param(nextP_shift17, \CREG).as_bool()
select port(nextP_shift17, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("011")
select nusers(port(nextP_shift17, \C, SigSpec())) > 1
select nusers(port(nextP_shift17, \PCIN, SigSpec())) == 0
index <SigBit> port(nextP_shift17, \C)[0] === port(std::get<0>(chain.back()), \P)[17]
semioptional
endmatch
code next
next = nextP;
if (!nextP)
next = nextP_shift17;
if (next) {
if (next->type != first->type)
reject;
unextend = [](const SigSpec &sig) {
int i;
for (i = GetSize(sig)-1; i > 0; i--)
if (sig[i] != sig[i-1])
break;
// Do not remove non-const sign bit
if (sig[i].wire)
++i;
return sig.extract(0, i);
};
}
endcode
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
// if (a) this DSP48E1 does not already have an ACOUT -> ACIN cascade,
// (b) the previous DSP does not already use its ACOUT port, then
// examine if an ACOUT -> ACIN cascade opportunity exists if
// (i) A ports are identical, or (ii) separated by a
// $dff-with-optional-clock-enable-or-reset and checking that the 'D' input
// of this register is the same as the 'A' input of the previous DSP
// TODO: Check for two levels of flops, instead of just one
code argQ clock AREG
AREG = -1;
if (next && next->type.in(\DSP48E1)) {
Cell *prev = std::get<0>(chain.back());
if (param(next, \A_INPUT).decode_string() == "DIRECT" &&
port(next, \ACIN, SigSpec()).is_fully_zero() &&
nusers(port(prev, \ACOUT, SigSpec())) <= 1) {
if (param(prev, \AREG) == 0) {
if (port(prev, \A) == port(next, \A))
AREG = 0;
}
else {
argQ = unextend(port(next, \A));
clock = port(prev, \CLK);
subpattern(in_dffe);
if (dff) {
if (!dff->type.in($sdff, $sdffe) && port(prev, \RSTA, State::S0) != State::S0)
goto reject_AREG;
if (dff->type.in($sdff, $sdffe) && (port(dff, \SRST) != port(prev, \RSTA, State::S0) || !param(dff, \SRST_POLARITY).as_bool()))
goto reject_AREG;
IdString CEA;
if (param(prev, \AREG) == 1)
CEA = \CEA2;
else if (param(prev, \AREG) == 2)
CEA = \CEA1;
else log_abort();
if (!dff->type.in($dffe, $sdffe) && port(prev, CEA, State::S0) != State::S1)
goto reject_AREG;
if (dff->type.in($dffe, $sdffe) && (port(dff, \EN) != port(prev, CEA, State::S0) || !param(dff, \EN_POLARITY).as_bool()))
goto reject_AREG;
if (dffD == unextend(port(prev, \A)))
AREG = 1;
}
}
}
reject_AREG: ;
}
endcode
// (4) Same as (3) but for BCOUT -> BCIN cascade
code argQ clock BREG
BREG = -1;
if (next) {
Cell *prev = std::get<0>(chain.back());
if ((next->type != \DSP48E1 || param(next, \B_INPUT).decode_string() == "DIRECT") &&
port(next, \BCIN, SigSpec()).is_fully_zero() &&
nusers(port(prev, \BCOUT, SigSpec())) <= 1) {
if ((next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG) == 0 && param(prev, \B1REG) == 0) ||
(next->type.in(\DSP48E1) && param(prev, \BREG) == 0)) {
if (port(prev, \B) == port(next, \B))
BREG = 0;
}
else {
argQ = unextend(port(next, \B));
clock = port(prev, \CLK);
subpattern(in_dffe);
if (dff) {
if (!dff->type.in($sdff, $sdffe) && port(prev, \RSTB, State::S0) != State::S0)
goto reject_BREG;
if (dff->type.in($sdff, $sdffe) && (port(dff, \SRST) != port(prev, \RSTB, State::S0) || !param(dff, \SRST_POLARITY).as_bool()))
goto reject_BREG;
IdString CEB;
if (next->type.in(\DSP48A, \DSP48A1))
CEB = \CEB;
else if (next->type.in(\DSP48E1)) {
if (param(prev, \BREG) == 1)
CEB = \CEB2;
else if (param(prev, \BREG) == 2)
CEB = \CEB1;
else log_abort();
}
else log_abort();
if (!dff->type.in($dffe, $sdffe) && port(prev, CEB, State::S0) != State::S1)
goto reject_BREG;
if (dff->type.in($dffe, $sdffe) && (port(dff, \EN) != port(prev, CEB, State::S0) || !param(dff, \EN_POLARITY).as_bool()))
goto reject_BREG;
if (dffD == unextend(port(prev, \B))) {
if (next->type.in(\DSP48A, \DSP48A1) && param(prev, \B0REG) != 0)
goto reject_BREG;
BREG = 1;
}
}
}
}
reject_BREG: ;
}
endcode
// (5) Recursively go to (2.1) until no more matches possible, recording the
// longest possible chain
code
if (next) {
chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);
SigSpec sigC = unextend(port(next, \C));
if (nextP_shift17) {
if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&
port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC)
subpattern(tail);
}
else {
if (GetSize(sigC) <= GetSize(port(std::get<0>(chain.back()), \P)) &&
port(std::get<0>(chain.back()), \P).extract(0, GetSize(sigC)) != sigC)
subpattern(tail);
}
} else {
if (GetSize(chain) > GetSize(longest_chain))
longest_chain = chain;
}
finally
if (next)
chain.pop_back();
endcode
// #######################
// Subpattern for matching against input registers, based on knowledge of the
// 'Q' input.
subpattern in_dffe
arg argQ clock
code
dff = nullptr;
if (argQ.empty())
reject;
for (const auto &c : argQ.chunks()) {
// Abandon matches when 'Q' is a constant
if (!c.wire)
reject;
// Abandon matches when 'Q' has the keep attribute set
if (c.wire->get_bool_attribute(\keep))
reject;
// Abandon matches when 'Q' has a non-zero init attribute set
// (not supported by DSP48E1)
Const init = c.wire->attributes.at(\init, Const());
if (!init.empty())
for (auto b : init.extract(c.offset, c.width))
if (b != State::Sx && b != State::S0)
reject;
}
endcode
match ff
select ff->type.in($dff, $dffe, $sdff, $sdffe)
// DSP48E1 does not support clock inversion
select param(ff, \CLK_POLARITY).as_bool()
// Check that reset value, if present, is fully 0.
filter ff->type.in($dff, $dffe) || param(ff, \SRST_VALUE).is_fully_zero()
slice offset GetSize(port(ff, \D))
index <SigBit> port(ff, \Q)[offset] === argQ[0]
// Check that the rest of argQ is present
filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
filter clock == SigBit() || port(ff, \CLK)[0] == clock
endmatch
code argQ
SigSpec Q = port(ff, \Q);
dff = ff;
dffclock = port(ff, \CLK);
dffD = argQ;
SigSpec D = port(ff, \D);
argQ = Q;
dffD.replace(argQ, D);
endcode

View file

@ -0,0 +1,258 @@
/*
* yosys -- Yosys Open SYnthesis Suite
*
* Copyright (C) 2012 Claire Xenia Wolf <claire@yosyshq.com>
* (C) 2019 Eddie Hung <eddie@fpgeh.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "kernel/yosys.h"
#include "kernel/sigtools.h"
USING_YOSYS_NAMESPACE
PRIVATE_NAMESPACE_BEGIN
#include "techlibs/xilinx/xilinx_srl_pm.h"
void run_fixed(xilinx_srl_pm &pm)
{
auto &st = pm.st_fixed;
auto &ud = pm.ud_fixed;
log("Found fixed chain of length %d (%s):\n", GetSize(ud.longest_chain), log_id(st.first->type));
SigSpec initval;
for (auto cell : ud.longest_chain) {
log_debug(" %s\n", log_id(cell));
if (cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_))) {
SigBit Q = cell->getPort(ID::Q);
log_assert(Q.wire);
auto it = Q.wire->attributes.find(ID::init);
if (it != Q.wire->attributes.end()) {
auto &i = it->second.bits()[Q.offset];
initval.append(i);
i = State::Sx;
}
else
initval.append(State::Sx);
}
else if (cell->type.in(ID(FDRE), ID(FDRE_1))) {
if (cell->getParam(ID::INIT).as_bool())
initval.append(State::S1);
else
initval.append(State::S0);
}
else
log_abort();
pm.autoremove(cell);
}
auto first_cell = ud.longest_chain.back();
auto last_cell = ud.longest_chain.front();
Cell *c = pm.module->addCell(NEW_ID, ID($__XILINX_SHREG_));
pm.module->swap_names(c, first_cell);
if (first_cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_), ID(FDRE), ID(FDRE_1))) {
c->setParam(ID::DEPTH, GetSize(ud.longest_chain));
c->setParam(ID::INIT, initval.as_const());
if (first_cell->type.in(ID($_DFF_P_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
c->setParam(ID(CLKPOL), 1);
else if (first_cell->type.in(ID($_DFF_N_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID(FDRE_1)))
c->setParam(ID(CLKPOL), 0);
else if (first_cell->type.in(ID(FDRE))) {
if (!first_cell->getParam(ID(IS_C_INVERTED)).as_bool())
c->setParam(ID(CLKPOL), 1);
else
c->setParam(ID(CLKPOL), 0);
}
else
log_abort();
if (first_cell->type.in(ID($_DFFE_NP_), ID($_DFFE_PP_)))
c->setParam(ID(ENPOL), 1);
else if (first_cell->type.in(ID($_DFFE_NN_), ID($_DFFE_PN_)))
c->setParam(ID(ENPOL), 0);
else
c->setParam(ID(ENPOL), 2);
c->setPort(ID::C, first_cell->getPort(ID::C));
c->setPort(ID::D, first_cell->getPort(ID::D));
c->setPort(ID::Q, last_cell->getPort(ID::Q));
c->setPort(ID::L, GetSize(ud.longest_chain)-1);
if (first_cell->type.in(ID($_DFF_N_), ID($_DFF_P_)))
c->setPort(ID::E, State::S1);
else if (first_cell->type.in(ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
c->setPort(ID::E, first_cell->getPort(ID::E));
else if (first_cell->type.in(ID(FDRE), ID(FDRE_1)))
c->setPort(ID::E, first_cell->getPort(ID(CE)));
else
log_abort();
}
else
log_abort();
log(" -> %s (%s)\n", log_id(c), log_id(c->type));
}
void run_variable(xilinx_srl_pm &pm)
{
auto &st = pm.st_variable;
auto &ud = pm.ud_variable;
log("Found variable chain of length %d (%s):\n", GetSize(ud.chain), log_id(st.first->type));
SigSpec initval;
for (const auto &i : ud.chain) {
auto cell = i.first;
auto slice = i.second;
log_debug(" %s\n", log_id(cell));
if (cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_), ID($dff), ID($dffe))) {
SigBit Q = cell->getPort(ID::Q)[slice];
log_assert(Q.wire);
auto it = Q.wire->attributes.find(ID::init);
if (it != Q.wire->attributes.end()) {
auto &i = it->second.bits()[Q.offset];
initval.append(i);
i = State::Sx;
}
else
initval.append(State::Sx);
}
else
log_abort();
}
pm.autoremove(st.shiftx);
auto first_cell = ud.chain.back().first;
auto first_slice = ud.chain.back().second;
Cell *c = pm.module->addCell(NEW_ID, ID($__XILINX_SHREG_));
pm.module->swap_names(c, first_cell);
if (first_cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_), ID($dff), ID($dffe))) {
c->setParam(ID::DEPTH, GetSize(ud.chain));
c->setParam(ID::INIT, initval.as_const());
Const clkpol, enpol;
if (first_cell->type.in(ID($_DFF_P_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
clkpol = 1;
else if (first_cell->type.in(ID($_DFF_N_), ID($_DFFE_NN_), ID($_DFFE_NP_)))
clkpol = 0;
else if (first_cell->type.in(ID($dff), ID($dffe)))
clkpol = first_cell->getParam(ID::CLK_POLARITY);
else
log_abort();
if (first_cell->type.in(ID($_DFFE_NP_), ID($_DFFE_PP_)))
enpol = 1;
else if (first_cell->type.in(ID($_DFFE_NN_), ID($_DFFE_PN_)))
enpol = 0;
else if (first_cell->type.in(ID($dffe)))
enpol = first_cell->getParam(ID::EN_POLARITY);
else
enpol = 2;
c->setParam(ID(CLKPOL), clkpol);
c->setParam(ID(ENPOL), enpol);
if (first_cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
c->setPort(ID::C, first_cell->getPort(ID::C));
else if (first_cell->type.in(ID($dff), ID($dffe)))
c->setPort(ID::C, first_cell->getPort(ID::CLK));
else
log_abort();
c->setPort(ID::D, first_cell->getPort(ID::D)[first_slice]);
c->setPort(ID::Q, st.shiftx->getPort(ID::Y));
c->setPort(ID::L, st.shiftx->getPort(ID::B));
if (first_cell->type.in(ID($_DFF_N_), ID($_DFF_P_), ID($dff)))
c->setPort(ID::E, State::S1);
else if (first_cell->type.in(ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
c->setPort(ID::E, first_cell->getPort(ID::E));
else if (first_cell->type.in(ID($dffe)))
c->setPort(ID::E, first_cell->getPort(ID::EN));
else
log_abort();
}
else
log_abort();
log(" -> %s (%s)\n", log_id(c), log_id(c->type));
}
struct XilinxSrlPass : public Pass {
XilinxSrlPass() : Pass("xilinx_srl", "Xilinx shift register extraction") { }
void help() override
{
// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
log("\n");
log(" xilinx_srl [options] [selection]\n");
log("\n");
log("This pass converts chains of built-in flops (bit-level: $_DFF_[NP]_, $_DFFE_*\n");
log("and word-level: $dff, $dffe) as well as Xilinx flops (FDRE, FDRE_1) into a\n");
log("$__XILINX_SHREG cell. Chains must be of the same cell type, clock, clock\n");
log("polarity, enable, and enable polarity (where relevant).\n");
log("Flops with resets cannot be mapped to Xilinx devices and will not be inferred.\n");
log("\n");
log(" -minlen N\n");
log(" min length of shift register (default = 3)\n");
log("\n");
log(" -fixed\n");
log(" infer fixed-length shift registers.\n");
log("\n");
log(" -variable\n");
log(" infer variable-length shift registers (i.e. fixed-length shifts where\n");
log(" each element also fans-out to a $shiftx cell).\n");
log("\n");
}
void execute(std::vector<std::string> args, RTLIL::Design *design) override
{
log_header(design, "Executing XILINX_SRL pass (Xilinx shift register extraction).\n");
bool fixed = false;
bool variable = false;
int minlen = 3;
size_t argidx;
for (argidx = 1; argidx < args.size(); argidx++)
{
if (args[argidx] == "-minlen" && argidx+1 < args.size()) {
minlen = atoi(args[++argidx].c_str());
continue;
}
if (args[argidx] == "-fixed") {
fixed = true;
continue;
}
if (args[argidx] == "-variable") {
variable = true;
continue;
}
break;
}
extra_args(args, argidx, design);
if (!fixed && !variable)
log_cmd_error("'-fixed' and/or '-variable' must be specified.\n");
for (auto module : design->selected_modules()) {
auto pm = xilinx_srl_pm(module, module->selected_cells());
pm.ud_fixed.minlen = minlen;
pm.ud_variable.minlen = minlen;
if (fixed)
pm.run_fixed(run_fixed);
if (variable)
pm.run_variable(run_variable);
}
}
} XilinxSrlPass;
PRIVATE_NAMESPACE_END

View file

@ -0,0 +1,326 @@
pattern fixed
state <IdString> clk_port en_port
udata <vector<Cell*>> chain longest_chain
udata <pool<Cell*>> non_first_cells
udata <int> minlen
code
non_first_cells.clear();
subpattern(setup);
endcode
match first
select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !first->has_keep_attr()
select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED).as_bool()
select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED).as_bool()
select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero()
filter !non_first_cells.count(first)
generate
SigSpec C = module->addWire(NEW_ID);
SigSpec D = module->addWire(NEW_ID);
SigSpec Q = module->addWire(NEW_ID);
auto r = rng(8);
Cell* cell;
switch (r)
{
case 0:
case 1:
cell = module->addCell(NEW_ID, \FDRE);
cell->setPort(\C, C);
cell->setPort(\D, D);
cell->setPort(\Q, Q);
cell->setPort(\CE, module->addWire(NEW_ID));
if (r & 1)
cell->setPort(\R, module->addWire(NEW_ID));
else {
if (rng(2) == 0)
cell->setPort(\R, State::S0);
}
break;
case 2:
case 3:
cell = module->addDffGate(NEW_ID, C, D, Q, r & 1);
break;
case 4:
case 5:
case 6:
case 7:
cell = module->addDffeGate(NEW_ID, C, module->addWire(NEW_ID), D, Q, r & 1, r & 2);
break;
default: log_abort();
}
endmatch
code clk_port en_port
if (first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1))
clk_port = \C;
else log_abort();
if (first->type.in($_DFF_N_, $_DFF_P_))
en_port = IdString();
else if (first->type.in($_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_))
en_port = \E;
else if (first->type.in(\FDRE, \FDRE_1))
en_port = \CE;
else log_abort();
longest_chain.clear();
chain.push_back(first);
subpattern(tail);
finally
chain.pop_back();
log_assert(chain.empty());
if (GetSize(longest_chain) >= minlen)
accept;
endcode
// ------------------------------------------------------------------
subpattern setup
arg clk_port
arg en_port
match first
select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !first->has_keep_attr()
select !first->type.in(\FDRE) || !param(first, \IS_R_INVERTED).as_bool()
select !first->type.in(\FDRE) || !param(first, \IS_D_INVERTED).as_bool()
select !first->type.in(\FDRE, \FDRE_1) || port(first, \R, State::S0).is_fully_zero()
endmatch
code clk_port en_port
if (first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1))
clk_port = \C;
else log_abort();
if (first->type.in($_DFF_N_, $_DFF_P_))
en_port = IdString();
else if (first->type.in($_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_))
en_port = \E;
else if (first->type.in(\FDRE, \FDRE_1))
en_port = \CE;
else log_abort();
endcode
match next
select next->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !next->has_keep_attr()
select port(next, \D)[0].wire && !port(next, \D)[0].wire->get_bool_attribute(\keep)
select nusers(port(next, \Q)) == 2
index <IdString> next->type === first->type
index <SigBit> port(next, \Q) === port(first, \D)
filter port(next, clk_port) == port(first, clk_port)
filter en_port == IdString() || port(next, en_port) == port(first, en_port)
filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED).as_bool() == param(first, \IS_C_INVERTED).as_bool()
filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED).as_bool() == param(first, \IS_D_INVERTED).as_bool()
filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED).as_bool() == param(first, \IS_R_INVERTED).as_bool()
filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero()
endmatch
code
non_first_cells.insert(next);
endcode
// ------------------------------------------------------------------
subpattern tail
arg first
arg clk_port
arg en_port
match next
semioptional
select next->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, \FDRE, \FDRE_1)
select !next->has_keep_attr()
select port(next, \D)[0].wire && !port(next, \D)[0].wire->get_bool_attribute(\keep)
select nusers(port(next, \Q)) == 2
index <IdString> next->type === chain.back()->type
index <SigBit> port(next, \Q) === port(chain.back(), \D)
filter port(next, clk_port) == port(first, clk_port)
filter en_port == IdString() || port(next, en_port) == port(first, en_port)
filter !first->type.in(\FDRE) || param(next, \IS_C_INVERTED).as_bool() == param(first, \IS_C_INVERTED).as_bool()
filter !first->type.in(\FDRE) || param(next, \IS_D_INVERTED).as_bool() == param(first, \IS_D_INVERTED).as_bool()
filter !first->type.in(\FDRE) || param(next, \IS_R_INVERTED).as_bool() == param(first, \IS_R_INVERTED).as_bool()
filter !first->type.in(\FDRE, \FDRE_1) || port(next, \R, State::S0).is_fully_zero()
generate
Cell *cell = module->addCell(NEW_ID, chain.back()->type);
cell->setPort(\C, chain.back()->getPort(\C));
cell->setPort(\D, module->addWire(NEW_ID));
cell->setPort(\Q, chain.back()->getPort(\D));
if (cell->type == \FDRE) {
if (rng(2) == 0)
cell->setPort(\R, port(chain.back(), \R, State::S0));
cell->setPort(\CE, chain.back()->getPort(\CE));
}
else if (cell->type.begins_with("$_DFFE_"))
cell->setPort(\E, chain.back()->getPort(\E));
endmatch
code
if (next) {
chain.push_back(next);
subpattern(tail);
} else {
if (GetSize(chain) > GetSize(longest_chain))
longest_chain = chain;
}
finally
if (next)
chain.pop_back();
endcode
// -----------
pattern variable
state <IdString> clk_port en_port
state <int> shiftx_width
state <int> slice
udata <int> minlen
udata <vector<pair<Cell*,int>>> chain
udata <pool<SigBit>> chain_bits
code
chain_bits.clear();
endcode
match shiftx
select shiftx->type.in($shiftx)
select !shiftx->has_keep_attr()
select param(shiftx, \Y_WIDTH).as_int() == 1
filter param(shiftx, \A_WIDTH).as_int() >= minlen
generate
minlen = 3;
module->addShiftx(NEW_ID, module->addWire(NEW_ID, rng(6)+minlen), module->addWire(NEW_ID, 3), module->addWire(NEW_ID));
endmatch
code shiftx_width
shiftx_width = param(shiftx, \A_WIDTH).as_int();
endcode
match first
select first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, $dff, $dffe)
select !first->has_keep_attr()
select port(first, \Q)[0].wire && !port(first, \Q)[0].wire->get_bool_attribute(\keep)
slice idx GetSize(port(first, \Q))
select nusers(port(first, \Q)[idx]) <= 2
index <SigBit> port(first, \Q)[idx] === port(shiftx, \A)[shiftx_width-1]
set slice idx
generate
SigSpec C = module->addWire(NEW_ID);
auto WIDTH = rng(3)+1;
SigSpec D = module->addWire(NEW_ID, WIDTH);
SigSpec Q = module->addWire(NEW_ID, WIDTH);
auto r = rng(8);
Cell *cell = nullptr;
switch (r)
{
case 0:
case 1:
cell = module->addDff(NEW_ID, C, D, Q, r & 1);
break;
case 2:
case 3:
case 4:
case 5:
//cell = module->addDffe(NEW_ID, C, module->addWire(NEW_ID), D, Q, r & 1, r & 4);
//break;
case 6:
case 7:
WIDTH = 1;
cell = module->addDffGate(NEW_ID, C, D[0], Q[0], r & 1);
break;
default: log_abort();
}
shiftx->connections_.at(\A)[shiftx_width-1] = port(cell, \Q)[rng(WIDTH)];
endmatch
code clk_port en_port
if (first->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_))
clk_port = \C;
else if (first->type.in($dff, $dffe))
clk_port = \CLK;
else log_abort();
if (first->type.in($_DFF_N_, $_DFF_P_, $dff))
en_port = IdString();
else if (first->type.in($_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_))
en_port = \E;
else if (first->type.in($dffe))
en_port = \EN;
else log_abort();
chain_bits.insert(port(first, \Q)[slice]);
chain.emplace_back(first, slice);
subpattern(tail);
finally
if (GetSize(chain) == shiftx_width)
accept;
chain.clear();
endcode
// ------------------------------------------------------------------
subpattern tail
arg first
arg shiftx
arg shiftx_width
arg slice
arg clk_port
arg en_port
match next
semioptional
select next->type.in($_DFF_N_, $_DFF_P_, $_DFFE_NN_, $_DFFE_NP_, $_DFFE_PN_, $_DFFE_PP_, $dff, $dffe)
select !next->has_keep_attr()
select port(next, \D)[0].wire && !port(next, \D)[0].wire->get_bool_attribute(\keep)
slice idx GetSize(port(next, \Q))
select nusers(port(next, \Q)[idx]) <= 3
index <IdString> next->type === chain.back().first->type
index <SigBit> port(next, \Q)[idx] === port(chain.back().first, \D)[chain.back().second]
index <SigBit> port(next, \Q)[idx] === port(shiftx, \A)[shiftx_width-1-GetSize(chain)]
filter port(next, clk_port) == port(first, clk_port)
filter en_port == IdString() || port(next, en_port) == port(first, en_port)
filter !next->type.in($dff, $dffe) || param(next, \CLK_POLARITY).as_bool() == param(first, \CLK_POLARITY).as_bool()
filter !next->type.in($dffe) || param(next, \EN_POLARITY).as_bool() == param(first, \EN_POLARITY).as_bool()
filter !chain_bits.count(port(next, \D)[idx])
set slice idx
generate
if (GetSize(chain) < shiftx_width) {
auto back = chain.back().first;
auto slice = chain.back().second;
if (back->type.in($dff, $dffe)) {
auto WIDTH = GetSize(port(back, \D));
if (rng(2) == 0 && slice < WIDTH-1) {
auto new_slice = slice + rng(WIDTH-1-slice);
back->connections_.at(\D)[slice] = port(back, \Q)[new_slice];
}
else {
auto D = module->addWire(NEW_ID, WIDTH);
if (back->type == $dff)
module->addDff(NEW_ID, port(back, \CLK), D, port(back, \D), param(back, \CLK_POLARITY).as_bool());
else if (back->type == $dffe)
module->addDffe(NEW_ID, port(back, \CLK), port(back, \EN), D, port(back, \D), param(back, \CLK_POLARITY).as_bool(), param(back, \EN_POLARITY).as_bool());
else
log_abort();
}
}
else if (back->type.begins_with("$_DFF_")) {
Cell *cell = module->addCell(NEW_ID, back->type);
cell->setPort(\C, back->getPort(\C));
cell->setPort(\D, module->addWire(NEW_ID));
cell->setPort(\Q, back->getPort(\D));
}
else
log_abort();
shiftx->connections_.at(\A)[shiftx_width-1-GetSize(chain)] = port(back, \D)[slice];
}
endmatch
code
if (next) {
chain_bits.insert(port(next, \Q)[slice]);
chain.emplace_back(next, slice);
if (GetSize(chain) < shiftx_width)
subpattern(tail);
}
endcode