mirror of
https://github.com/YosysHQ/yosys
synced 2025-06-14 18:06:16 +00:00
Add comments for xilinx_dsp
This commit is contained in:
parent
b47bb5c810
commit
cf82b38478
3 changed files with 134 additions and 6 deletions
|
@ -608,8 +608,13 @@ struct XilinxDspPass : public Pass {
|
||||||
extra_args(args, argidx, design);
|
extra_args(args, argidx, design);
|
||||||
|
|
||||||
for (auto module : design->selected_modules()) {
|
for (auto module : design->selected_modules()) {
|
||||||
|
// Experimental feature: pack $add/$sub cells with
|
||||||
|
// (* use_dsp48="simd" *) into DSP48E1's using its
|
||||||
|
// SIMD feature
|
||||||
xilinx_simd_pack(module, module->selected_cells());
|
xilinx_simd_pack(module, module->selected_cells());
|
||||||
|
|
||||||
|
// Match for all features ([ABDMP][12]?REG, pre-adder,
|
||||||
|
// (post-adder, pattern detector, etc.) except for CREG
|
||||||
{
|
{
|
||||||
xilinx_dsp_pm pm(module, module->selected_cells());
|
xilinx_dsp_pm pm(module, module->selected_cells());
|
||||||
pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
|
pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
|
||||||
|
@ -618,14 +623,17 @@ struct XilinxDspPass : public Pass {
|
||||||
// is no guarantee that the cell ordering corresponds
|
// is no guarantee that the cell ordering corresponds
|
||||||
// to the "expected" case (i.e. the order in which
|
// to the "expected" case (i.e. the order in which
|
||||||
// they appear in the source) thus the possiblity
|
// they appear in the source) thus the possiblity
|
||||||
// existed that a register got packed as CREG into a
|
// existed that a register got packed as a CREG into a
|
||||||
// downstream DSP that should have otherwise been a
|
// downstream DSP that should have otherwise been a
|
||||||
// PREG of an upstream DSP that had not been pattern
|
// PREG of an upstream DSP that had not been visited
|
||||||
// matched yet
|
// yet
|
||||||
{
|
{
|
||||||
xilinx_dsp_CREG_pm pm(module, module->selected_cells());
|
xilinx_dsp_CREG_pm pm(module, module->selected_cells());
|
||||||
pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
|
pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
|
||||||
}
|
}
|
||||||
|
// Lastly, identify and utilise PCOUT -> PCIN,
|
||||||
|
// ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
|
||||||
|
// chains
|
||||||
{
|
{
|
||||||
xilinx_dsp_cascade_pm pm(module, module->selected_cells());
|
xilinx_dsp_cascade_pm pm(module, module->selected_cells());
|
||||||
pm.run_xilinx_dsp_cascade();
|
pm.run_xilinx_dsp_cascade();
|
||||||
|
|
|
@ -1,3 +1,53 @@
|
||||||
|
// This file describes the main pattern matcher setup (of three total) that
|
||||||
|
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
|
||||||
|
// At a high level, it works as follows:
|
||||||
|
// ( 1) Starting from a DSP48E1 cell
|
||||||
|
// ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
|
||||||
|
// (attached to at most two $mux cells that implement clock-enable or
|
||||||
|
// reset functionality, using a subpattern discussed below)
|
||||||
|
// If ADREG matched, treat 'A' input as input of ADREG
|
||||||
|
// ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
|
||||||
|
// (pre-adder)
|
||||||
|
// ( 4) If pre-adder was present, find match 'A' input for A2REG
|
||||||
|
// If pre-adder was not present, move ADREG to A2REG
|
||||||
|
// If A2REG, then match 'A' input for A1REG
|
||||||
|
// ( 5) Match 'B' input for B2REG
|
||||||
|
// If B2REG, then match 'B' input for B1REG
|
||||||
|
// ( 6) Match 'D' input for DREG
|
||||||
|
// ( 7) Match 'P' output that exclusively drives an MREG
|
||||||
|
// ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
|
||||||
|
// cell (post-adder).
|
||||||
|
// The other input to the adder is assumed to come in from the 'C' input
|
||||||
|
// (note: 'P' -> 'C' connections that exist for accumulators are
|
||||||
|
// recognised in xilinx_dsp.cc).
|
||||||
|
// ( 9) Match 'P' output that exclusively drives a PREG
|
||||||
|
// (10) If post-adder and PREG both present, match for a $mux cell driving
|
||||||
|
// the 'C' input, where one of the $mux's inputs is the PREG output.
|
||||||
|
// This indicates an accumulator situation, and one where a $mux exists
|
||||||
|
// to override the accumulated value:
|
||||||
|
// +--------------------------------+
|
||||||
|
// | ____ |
|
||||||
|
// +--| \ |
|
||||||
|
// |$mux|-+ |
|
||||||
|
// 'C' ---|____/ | |
|
||||||
|
// | /-------\ +----+ |
|
||||||
|
// +----+ +-| post- |___|PREG|---+ 'P'
|
||||||
|
// |MREG|------ | adder | +----+
|
||||||
|
// +----+ \-------/
|
||||||
|
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached
|
||||||
|
// to the 'P' output where it is compared to a constant that is a
|
||||||
|
// power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
|
||||||
|
// In this scenario, the pattern detector functionality of a DSP48E1 can
|
||||||
|
// to implement this function
|
||||||
|
// Notes:
|
||||||
|
// - The intention of this pattern matcher is for it to be compatible with
|
||||||
|
// DSP48E1 cells inferred from multiply operations by Yosys, as well as for
|
||||||
|
// user instantiations that may already contain the cells being packed...
|
||||||
|
// (though the latter is currently untested)
|
||||||
|
// - Since the $dff-with-clock-enable-or-reset-mux pattern is used for each
|
||||||
|
// *REG match, it has been factored out into two subpatterns: in_dffe
|
||||||
|
// out_dffe located at the bottom of this file
|
||||||
|
|
||||||
pattern xilinx_dsp_pack
|
pattern xilinx_dsp_pack
|
||||||
|
|
||||||
state <SigBit> clock
|
state <SigBit> clock
|
||||||
|
@ -5,12 +55,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP
|
||||||
state <IdString> postAddAB postAddMuxAB
|
state <IdString> postAddAB postAddMuxAB
|
||||||
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
|
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
|
||||||
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
|
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
|
||||||
|
|
||||||
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
|
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
|
||||||
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
|
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
|
||||||
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
|
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
|
||||||
|
|
||||||
// subpattern
|
// Variables used for subpatterns
|
||||||
state <SigSpec> argQ argD
|
state <SigSpec> argQ argD
|
||||||
state <bool> ffcepol ffrstpol
|
state <bool> ffcepol ffrstpol
|
||||||
state <int> ffoffset
|
state <int> ffoffset
|
||||||
|
@ -19,6 +68,7 @@ udata <SigBit> dffclock
|
||||||
udata <Cell*> dff dffcemux dffrstmux
|
udata <Cell*> dff dffcemux dffrstmux
|
||||||
udata <bool> dffcepol dffrstpol
|
udata <bool> dffcepol dffrstpol
|
||||||
|
|
||||||
|
// (1) Starting from a DSP48E1 cell
|
||||||
match dsp
|
match dsp
|
||||||
select dsp->type.in(\DSP48E1)
|
select dsp->type.in(\DSP48E1)
|
||||||
endmatch
|
endmatch
|
||||||
|
@ -53,6 +103,7 @@ code sigA sigB sigC sigD sigM clock
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
sigM = P;
|
sigM = P;
|
||||||
|
// TODO: Check if necessary
|
||||||
// This sigM could have no users if downstream $add
|
// This sigM could have no users if downstream $add
|
||||||
// is narrower than $mul result, for example
|
// is narrower than $mul result, for example
|
||||||
if (sigM.empty())
|
if (sigM.empty())
|
||||||
|
@ -61,6 +112,10 @@ code sigA sigB sigC sigD sigM clock
|
||||||
clock = port(dsp, \CLK, SigBit());
|
clock = port(dsp, \CLK, SigBit());
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
|
||||||
|
// (attached to at most two $mux cells that implement clock-enable or
|
||||||
|
// reset functionality, using a subpattern discussed above)
|
||||||
|
// If matched, treat 'A' input as input of ADREG
|
||||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
|
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
|
||||||
if (param(dsp, \ADREG).as_int() == 0) {
|
if (param(dsp, \ADREG).as_int() == 0) {
|
||||||
argQ = sigA;
|
argQ = sigA;
|
||||||
|
@ -81,6 +136,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
|
||||||
|
// (pre-adder)
|
||||||
match preAdd
|
match preAdd
|
||||||
if sigD.empty() || sigD.is_fully_zero()
|
if sigD.empty() || sigD.is_fully_zero()
|
||||||
// Ensure that preAdder not already used
|
// Ensure that preAdder not already used
|
||||||
|
@ -103,6 +160,7 @@ match preAdd
|
||||||
endmatch
|
endmatch
|
||||||
|
|
||||||
code sigA sigD
|
code sigA sigD
|
||||||
|
// TODO: Check if this is necessary?
|
||||||
if (preAdd) {
|
if (preAdd) {
|
||||||
sigA = port(preAdd, \A);
|
sigA = port(preAdd, \A);
|
||||||
sigD = port(preAdd, \B);
|
sigD = port(preAdd, \B);
|
||||||
|
@ -111,6 +169,9 @@ code sigA sigD
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (4) If pre-adder was present, find match 'A' input for A2REG
|
||||||
|
// If pre-adder was not present, move ADREG to A2REG
|
||||||
|
// Then match 'A' input for A1REG
|
||||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
|
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
|
||||||
// Only search for ffA2 if there was a pre-adder
|
// Only search for ffA2 if there was a pre-adder
|
||||||
// (otherwise ffA2 would have been matched as ffAD)
|
// (otherwise ffA2 would have been matched as ffAD)
|
||||||
|
@ -173,6 +234,8 @@ ffA1_end: ;
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (5) Match 'B' input for B2REG
|
||||||
|
// If B2REG, then match 'B' input for B1REG
|
||||||
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
|
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
|
||||||
if (param(dsp, \BREG).as_int() == 0) {
|
if (param(dsp, \BREG).as_int() == 0) {
|
||||||
argQ = sigB;
|
argQ = sigB;
|
||||||
|
@ -222,6 +285,7 @@ ffB1_end: ;
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (6) Match 'D' input for DREG
|
||||||
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
|
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
|
||||||
if (param(dsp, \DREG).as_int() == 0) {
|
if (param(dsp, \DREG).as_int() == 0) {
|
||||||
argQ = sigD;
|
argQ = sigD;
|
||||||
|
@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (7) Match 'P' output that exclusively drives an MREG
|
||||||
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
|
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
|
||||||
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
|
if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
|
||||||
argD = sigM;
|
argD = sigM;
|
||||||
|
@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
|
||||||
sigP = sigM;
|
sigP = sigM;
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
|
||||||
|
// cell (post-adder).
|
||||||
|
// The other input to the adder is assumed to come in from the 'C' input
|
||||||
|
// (note: 'P' -> 'C' connections that exist for accumulators are
|
||||||
|
// recognised in xilinx_dsp.cc).
|
||||||
match postAdd
|
match postAdd
|
||||||
// Ensure that Z mux is not already used
|
// Ensure that Z mux is not already used
|
||||||
if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
|
if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
|
||||||
|
@ -291,6 +361,7 @@ code sigC sigP
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (9) Match 'P' output that exclusively drives a PREG
|
||||||
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
|
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
|
||||||
if (param(dsp, \PREG).as_int() == 0) {
|
if (param(dsp, \PREG).as_int() == 0) {
|
||||||
int users = 2;
|
int users = 2;
|
||||||
|
@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
|
||||||
}
|
}
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (10) If post-adder and PREG both present, match for a $mux cell driving
|
||||||
|
// the 'C' input, where one of the $mux's inputs is the PREG output.
|
||||||
|
// This indicates an accumulator situation, and one where a $mux exists
|
||||||
|
// to override the accumulated value:
|
||||||
|
// +--------------------------------+
|
||||||
|
// | ____ |
|
||||||
|
// +--| \ |
|
||||||
|
// |$mux|-+ |
|
||||||
|
// 'C' ---|____/ | |
|
||||||
|
// | /-------\ +----+ |
|
||||||
|
// +----+ +-| post- |___|PREG|---+ 'P'
|
||||||
|
// |MREG|------ | adder | +----+
|
||||||
|
// +----+ \-------/
|
||||||
match postAddMux
|
match postAddMux
|
||||||
if postAdd
|
if postAdd
|
||||||
if ffP
|
if ffP
|
||||||
|
@ -333,6 +417,11 @@ code sigC
|
||||||
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
|
sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
|
||||||
endcode
|
endcode
|
||||||
|
|
||||||
|
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
|
||||||
|
// the 'P' output where it is compared to a constant that is a power-of-2:
|
||||||
|
// e.g. `assign overflow = (PREG >= 2**40);`
|
||||||
|
// In this scenario, the pattern detector functionality of a DSP48E1 can
|
||||||
|
// to implement this function
|
||||||
match overflow
|
match overflow
|
||||||
if ffP
|
if ffP
|
||||||
if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
|
if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
|
||||||
|
|
|
@ -1,3 +1,25 @@
|
||||||
|
// This file describes the second of three pattern matcher setups that
|
||||||
|
// forms the `xilinx_dsp` pass described in xilinx_dsp.cc
|
||||||
|
// At a high level, it works as follows:
|
||||||
|
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
|
||||||
|
// and (b) uses the 'C' port
|
||||||
|
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
|
||||||
|
// (attached to at most two $mux cells that implement clock-enable or
|
||||||
|
// reset functionality, using a subpattern discussed below)
|
||||||
|
// Notes:
|
||||||
|
// - Separating out CREG packing is necessary since there is no guarantee
|
||||||
|
// that the cell ordering corresponds to the "expected" case (i.e. the order
|
||||||
|
// in which they appear in the source) thus the possiblity existed that a
|
||||||
|
// register got packed as a CREG into a downstream DSP that should have
|
||||||
|
// otherwise been a PREG of an upstream DSP that had not been visited yet
|
||||||
|
// - The reason this is separated out from the xilinx_dsp.pmg file is
|
||||||
|
// for efficiency --- each *.pmg file creates a class of the same basename,
|
||||||
|
// which when constructed, creates a custom database tailored to the
|
||||||
|
// pattern(s) contained within. Since the pattern in this file must be
|
||||||
|
// executed after the pattern contained in xilinx_dsp.pmg, it is necessary
|
||||||
|
// to reconstruct this database. Separating the two patterns into
|
||||||
|
// independent files causes two smaller, more specific, databases.
|
||||||
|
|
||||||
pattern xilinx_dsp_packC
|
pattern xilinx_dsp_packC
|
||||||
|
|
||||||
udata <std::function<SigSpec(const SigSpec&)>> unextend
|
udata <std::function<SigSpec(const SigSpec&)>> unextend
|
||||||
|
@ -15,13 +37,15 @@ udata <SigBit> dffclock
|
||||||
udata <Cell*> dff dffcemux dffrstmux
|
udata <Cell*> dff dffcemux dffrstmux
|
||||||
udata <bool> dffcepol dffrstpol
|
udata <bool> dffcepol dffrstpol
|
||||||
|
|
||||||
|
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
|
||||||
|
// and (b) uses the 'C' port
|
||||||
match dsp
|
match dsp
|
||||||
select dsp->type.in(\DSP48E1)
|
select dsp->type.in(\DSP48E1)
|
||||||
select param(dsp, \CREG, 1).as_int() == 0
|
select param(dsp, \CREG, 1).as_int() == 0
|
||||||
select nusers(port(dsp, \C, SigSpec())) > 1
|
select nusers(port(dsp, \C, SigSpec())) > 1
|
||||||
endmatch
|
endmatch
|
||||||
|
|
||||||
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
|
code sigC sigP
|
||||||
unextend = [](const SigSpec &sig) {
|
unextend = [](const SigSpec &sig) {
|
||||||
int i;
|
int i;
|
||||||
for (i = GetSize(sig)-1; i > 0; i--)
|
for (i = GetSize(sig)-1; i > 0; i--)
|
||||||
|
@ -47,7 +71,14 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
sigP = P;
|
sigP = P;
|
||||||
|
endcode
|
||||||
|
|
||||||
|
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
|
||||||
|
// (attached to at most two $mux cells that implement clock-enable or
|
||||||
|
// reset functionality, using a subpattern discussed below)
|
||||||
|
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
|
||||||
|
// TODO: Any downside to allowing this?
|
||||||
|
// If this DSP implements an accumulator, do not attempt to match
|
||||||
if (sigC == sigP)
|
if (sigC == sigP)
|
||||||
reject;
|
reject;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue