3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-06-14 01:46:16 +00:00

Add comments for xilinx_dsp

This commit is contained in:
Eddie Hung 2019-10-04 12:40:34 -07:00
parent b47bb5c810
commit cf82b38478
3 changed files with 134 additions and 6 deletions

View file

@ -608,8 +608,13 @@ struct XilinxDspPass : public Pass {
extra_args(args, argidx, design); extra_args(args, argidx, design);
for (auto module : design->selected_modules()) { for (auto module : design->selected_modules()) {
// Experimental feature: pack $add/$sub cells with
// (* use_dsp48="simd" *) into DSP48E1's using its
// SIMD feature
xilinx_simd_pack(module, module->selected_cells()); xilinx_simd_pack(module, module->selected_cells());
// Match for all features ([ABDMP][12]?REG, pre-adder,
// (post-adder, pattern detector, etc.) except for CREG
{ {
xilinx_dsp_pm pm(module, module->selected_cells()); xilinx_dsp_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_pack(xilinx_dsp_pack); pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
@ -618,14 +623,17 @@ struct XilinxDspPass : public Pass {
// is no guarantee that the cell ordering corresponds // is no guarantee that the cell ordering corresponds
// to the "expected" case (i.e. the order in which // to the "expected" case (i.e. the order in which
// they appear in the source) thus the possiblity // they appear in the source) thus the possiblity
// existed that a register got packed as CREG into a // existed that a register got packed as a CREG into a
// downstream DSP that should have otherwise been a // downstream DSP that should have otherwise been a
// PREG of an upstream DSP that had not been pattern // PREG of an upstream DSP that had not been visited
// matched yet // yet
{ {
xilinx_dsp_CREG_pm pm(module, module->selected_cells()); xilinx_dsp_CREG_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_packC(xilinx_dsp_packC); pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
} }
// Lastly, identify and utilise PCOUT -> PCIN,
// ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
// chains
{ {
xilinx_dsp_cascade_pm pm(module, module->selected_cells()); xilinx_dsp_cascade_pm pm(module, module->selected_cells());
pm.run_xilinx_dsp_cascade(); pm.run_xilinx_dsp_cascade();

View file

@ -1,3 +1,53 @@
// This file describes the main pattern matcher setup (of three total) that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// ( 1) Starting from a DSP48E1 cell
// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// If ADREG matched, treat 'A' input as input of ADREG
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
// ( 4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// If A2REG, then match 'A' input for A1REG
// ( 5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
// ( 6) Match 'D' input for DREG
// ( 7) Match 'P' output that exclusively drives an MREG
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
// ( 9) Match 'P' output that exclusively drives a PREG
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
// to the 'P' output where it is compared to a constant that is a
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
// Notes:
// - The intention of this pattern matcher is for it to be compatible with
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
// user instantiations that may already contain the cells being packed...
// (though the latter is currently untested)
// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each
// *REG match, it has been factored out into two subpatterns: in_dffe
// out_dffe located at the bottom of this file
pattern xilinx_dsp_pack pattern xilinx_dsp_pack
state <SigBit> clock state <SigBit> clock
@ -5,12 +55,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP
state <IdString> postAddAB postAddMuxAB state <IdString> postAddAB postAddMuxAB
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
// subpattern // Variables used for subpatterns
state <SigSpec> argQ argD state <SigSpec> argQ argD
state <bool> ffcepol ffrstpol state <bool> ffcepol ffrstpol
state <int> ffoffset state <int> ffoffset
@ -19,6 +68,7 @@ udata <SigBit> dffclock
udata <Cell*> dff dffcemux dffrstmux udata <Cell*> dff dffcemux dffrstmux
udata <bool> dffcepol dffrstpol udata <bool> dffcepol dffrstpol
// (1) Starting from a DSP48E1 cell
match dsp match dsp
select dsp->type.in(\DSP48E1) select dsp->type.in(\DSP48E1)
endmatch endmatch
@ -53,6 +103,7 @@ code sigA sigB sigC sigD sigM clock
} }
else else
sigM = P; sigM = P;
// TODO: Check if necessary
// This sigM could have no users if downstream $add // This sigM could have no users if downstream $add
// is narrower than $mul result, for example // is narrower than $mul result, for example
if (sigM.empty()) if (sigM.empty())
@ -61,6 +112,10 @@ code sigA sigB sigC sigD sigM clock
clock = port(dsp, \CLK, SigBit()); clock = port(dsp, \CLK, SigBit());
endcode endcode
// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed above)
// If matched, treat 'A' input as input of ADREG
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
if (param(dsp, \ADREG).as_int() == 0) { if (param(dsp, \ADREG).as_int() == 0) {
argQ = sigA; argQ = sigA;
@ -81,6 +136,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
} }
endcode endcode
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
// (pre-adder)
match preAdd match preAdd
if sigD.empty() || sigD.is_fully_zero() if sigD.empty() || sigD.is_fully_zero()
// Ensure that preAdder not already used // Ensure that preAdder not already used
@ -103,6 +160,7 @@ match preAdd
endmatch endmatch
code sigA sigD code sigA sigD
// TODO: Check if this is necessary?
if (preAdd) { if (preAdd) {
sigA = port(preAdd, \A); sigA = port(preAdd, \A);
sigD = port(preAdd, \B); sigD = port(preAdd, \B);
@ -111,6 +169,9 @@ code sigA sigD
} }
endcode endcode
// (4) If pre-adder was present, find match 'A' input for A2REG
// If pre-adder was not present, move ADREG to A2REG
// Then match 'A' input for A1REG
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
// Only search for ffA2 if there was a pre-adder // Only search for ffA2 if there was a pre-adder
// (otherwise ffA2 would have been matched as ffAD) // (otherwise ffA2 would have been matched as ffAD)
@ -173,6 +234,8 @@ ffA1_end: ;
} }
endcode endcode
// (5) Match 'B' input for B2REG
// If B2REG, then match 'B' input for B1REG
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
if (param(dsp, \BREG).as_int() == 0) { if (param(dsp, \BREG).as_int() == 0) {
argQ = sigB; argQ = sigB;
@ -222,6 +285,7 @@ ffB1_end: ;
} }
endcode endcode
// (6) Match 'D' input for DREG
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
if (param(dsp, \DREG).as_int() == 0) { if (param(dsp, \DREG).as_int() == 0) {
argQ = sigD; argQ = sigD;
@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
} }
endcode endcode
// (7) Match 'P' output that exclusively drives an MREG
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) { if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
argD = sigM; argD = sigM;
@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
sigP = sigM; sigP = sigM;
endcode endcode
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
// cell (post-adder).
// The other input to the adder is assumed to come in from the 'C' input
// (note: 'P' -> 'C' connections that exist for accumulators are
// recognised in xilinx_dsp.cc).
match postAdd match postAdd
// Ensure that Z mux is not already used // Ensure that Z mux is not already used
if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero() if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
@ -291,6 +361,7 @@ code sigC sigP
} }
endcode endcode
// (9) Match 'P' output that exclusively drives a PREG
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
if (param(dsp, \PREG).as_int() == 0) { if (param(dsp, \PREG).as_int() == 0) {
int users = 2; int users = 2;
@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
} }
endcode endcode
// (10) If post-adder and PREG both present, match for a $mux cell driving
// the 'C' input, where one of the $mux's inputs is the PREG output.
// This indicates an accumulator situation, and one where a $mux exists
// to override the accumulated value:
// +--------------------------------+
// | ____ |
// +--| \ |
// |$mux|-+ |
// 'C' ---|____/ | |
// | /-------\ +----+ |
// +----+ +-| post- |___|PREG|---+ 'P'
// |MREG|------ | adder | +----+
// +----+ \-------/
match postAddMux match postAddMux
if postAdd if postAdd
if ffP if ffP
@ -333,6 +417,11 @@ code sigC
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A); sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
endcode endcode
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
// the 'P' output where it is compared to a constant that is a power-of-2:
// e.g. `assign overflow = (PREG >= 2**40);`
// In this scenario, the pattern detector functionality of a DSP48E1 can
// to implement this function
match overflow match overflow
if ffP if ffP
if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET" if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"

View file

@ -1,3 +1,25 @@
// This file describes the second of three pattern matcher setups that
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
// At a high level, it works as follows:
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
// Notes:
// - Separating out CREG packing is necessary since there is no guarantee
// that the cell ordering corresponds to the "expected" case (i.e. the order
// in which they appear in the source) thus the possiblity existed that a
// register got packed as a CREG into a downstream DSP that should have
// otherwise been a PREG of an upstream DSP that had not been visited yet
// - The reason this is separated out from the xilinx_dsp.pmg file is
// for efficiency --- each *.pmg file creates a class of the same basename,
// which when constructed, creates a custom database tailored to the
// pattern(s) contained within. Since the pattern in this file must be
// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
// to reconstruct this database. Separating the two patterns into
// independent files causes two smaller, more specific, databases.
pattern xilinx_dsp_packC pattern xilinx_dsp_packC
udata <std::function<SigSpec(const SigSpec&)>> unextend udata <std::function<SigSpec(const SigSpec&)>> unextend
@ -15,13 +37,15 @@ udata <SigBit> dffclock
udata <Cell*> dff dffcemux dffrstmux udata <Cell*> dff dffcemux dffrstmux
udata <bool> dffcepol dffrstpol udata <bool> dffcepol dffrstpol
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
// and (b) uses the 'C' port
match dsp match dsp
select dsp->type.in(\DSP48E1) select dsp->type.in(\DSP48E1)
select param(dsp, \CREG, 1).as_int() == 0 select param(dsp, \CREG, 1).as_int() == 0
select nusers(port(dsp, \C, SigSpec())) > 1 select nusers(port(dsp, \C, SigSpec())) > 1
endmatch endmatch
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock code sigC sigP
unextend = [](const SigSpec &sig) { unextend = [](const SigSpec &sig) {
int i; int i;
for (i = GetSize(sig)-1; i > 0; i--) for (i = GetSize(sig)-1; i > 0; i--)
@ -47,7 +71,14 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
} }
else else
sigP = P; sigP = P;
endcode
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
// (attached to at most two $mux cells that implement clock-enable or
// reset functionality, using a subpattern discussed below)
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
// TODO: Any downside to allowing this?
// If this DSP implements an accumulator, do not attempt to match
if (sigC == sigP) if (sigC == sigP)
reject; reject;