mirror of
				https://github.com/YosysHQ/yosys
				synced 2025-11-04 13:29:12 +00:00 
			
		
		
		
	Merge pull request #1438 from YosysHQ/eddie/xilinx_dsp_comments
Add notes and comments for xilinx_dsp
This commit is contained in:
		
						commit
						472b5d33a6
					
				
					 5 changed files with 365 additions and 73 deletions
				
			
		| 
						 | 
				
			
			@ -609,8 +609,13 @@ struct XilinxDspPass : public Pass {
 | 
			
		|||
		extra_args(args, argidx, design);
 | 
			
		||||
 | 
			
		||||
		for (auto module : design->selected_modules()) {
 | 
			
		||||
			// Experimental feature: pack $add/$sub cells with
 | 
			
		||||
			//   (* use_dsp48="simd" *) into DSP48E1's using its
 | 
			
		||||
			//   SIMD feature
 | 
			
		||||
			xilinx_simd_pack(module, module->selected_cells());
 | 
			
		||||
 | 
			
		||||
			// Match for all features ([ABDMP][12]?REG, pre-adder,
 | 
			
		||||
			// post-adder, pattern detector, etc.) except for CREG
 | 
			
		||||
			{
 | 
			
		||||
				xilinx_dsp_pm pm(module, module->selected_cells());
 | 
			
		||||
				pm.run_xilinx_dsp_pack(xilinx_dsp_pack);
 | 
			
		||||
| 
						 | 
				
			
			@ -619,14 +624,17 @@ struct XilinxDspPass : public Pass {
 | 
			
		|||
			//   is no guarantee that the cell ordering corresponds
 | 
			
		||||
			//   to the "expected" case (i.e. the order in which
 | 
			
		||||
			//   they appear in the source) thus the possiblity
 | 
			
		||||
			//   existed that a register got packed as CREG into a
 | 
			
		||||
			//   existed that a register got packed as a CREG into a
 | 
			
		||||
			//   downstream DSP that should have otherwise been a
 | 
			
		||||
			//   PREG of an upstream DSP that had not been pattern
 | 
			
		||||
			//   matched yet
 | 
			
		||||
			//   PREG of an upstream DSP that had not been visited
 | 
			
		||||
			//   yet
 | 
			
		||||
			{
 | 
			
		||||
				xilinx_dsp_CREG_pm pm(module, module->selected_cells());
 | 
			
		||||
				pm.run_xilinx_dsp_packC(xilinx_dsp_packC);
 | 
			
		||||
			}
 | 
			
		||||
			// Lastly, identify and utilise PCOUT -> PCIN,
 | 
			
		||||
			//   ACOUT -> ACIN, and BCOUT-> BCIN dedicated cascade
 | 
			
		||||
			//   chains
 | 
			
		||||
			{
 | 
			
		||||
				xilinx_dsp_cascade_pm pm(module, module->selected_cells());
 | 
			
		||||
				pm.run_xilinx_dsp_cascade();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,3 +1,57 @@
 | 
			
		|||
// This file describes the main pattern matcher setup (of three total) that
 | 
			
		||||
//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc
 | 
			
		||||
// At a high level, it works as follows:
 | 
			
		||||
//   ( 1) Starting from a DSP48E1 cell
 | 
			
		||||
//   ( 2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
 | 
			
		||||
//        (attached to at most two $mux cells that implement clock-enable or
 | 
			
		||||
//         reset functionality, using a subpattern discussed below)
 | 
			
		||||
//        If ADREG matched, treat 'A' input as input of ADREG
 | 
			
		||||
//   ( 3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
 | 
			
		||||
//       (pre-adder)
 | 
			
		||||
//   ( 4) If pre-adder was present, find match 'A' input for A2REG
 | 
			
		||||
//        If pre-adder was not present, move ADREG to A2REG
 | 
			
		||||
//        If A2REG, then match 'A' input for A1REG
 | 
			
		||||
//   ( 5) Match 'B' input for B2REG
 | 
			
		||||
//        If B2REG, then match 'B' input for B1REG
 | 
			
		||||
//   ( 6) Match 'D' input for DREG
 | 
			
		||||
//   ( 7) Match 'P' output that exclusively drives an MREG
 | 
			
		||||
//   ( 8) Match 'P' output that exclusively drives one of two inputs to an $add
 | 
			
		||||
//        cell (post-adder).
 | 
			
		||||
//        The other input to the adder is assumed to come in from the 'C' input
 | 
			
		||||
//        (note: 'P' -> 'C' connections that exist for accumulators are
 | 
			
		||||
//         recognised in xilinx_dsp.cc).
 | 
			
		||||
//   ( 9) Match 'P' output that exclusively drives a PREG
 | 
			
		||||
//   (10) If post-adder and PREG both present, match for a $mux cell driving
 | 
			
		||||
//        the 'C' input, where one of the $mux's inputs is the PREG output.
 | 
			
		||||
//        This indicates an accumulator situation, and one where a $mux exists
 | 
			
		||||
//        to override the accumulated value:
 | 
			
		||||
//             +--------------------------------+
 | 
			
		||||
//             |   ____                         |
 | 
			
		||||
//             +--|    \                        |
 | 
			
		||||
//                |$mux|-+                      |
 | 
			
		||||
//         'C' ---|____/ |                      |
 | 
			
		||||
//                       | /-------\   +----+   |
 | 
			
		||||
//            +----+     +-| post- |___|PREG|---+ 'P'
 | 
			
		||||
//            |MREG|------ | adder |   +----+
 | 
			
		||||
//            +----+       \-------/
 | 
			
		||||
//   (11) If PREG present, match for a greater-than-or-equal $ge cell attached
 | 
			
		||||
//        to the 'P' output where it is compared to a constant that is a
 | 
			
		||||
//        power-of-2: e.g. `assign overflow = (PREG >= 2**40);`
 | 
			
		||||
//        In this scenario, the pattern detector functionality of a DSP48E1 can
 | 
			
		||||
//        to implement this function
 | 
			
		||||
// Notes:
 | 
			
		||||
//   - The intention of this pattern matcher is for it to be compatible with
 | 
			
		||||
//     DSP48E1 cells inferred from multiply operations by Yosys, as well as for
 | 
			
		||||
//     user instantiations that may already contain the cells being packed...
 | 
			
		||||
//     (though the latter is currently untested)
 | 
			
		||||
//   - Since the $dff-with-optional-clock-enable-or-reset-mux pattern is used
 | 
			
		||||
//     for each *REG match, it has been factored out into two subpatterns:
 | 
			
		||||
//     in_dffe and out_dffe located at the bottom of this file.
 | 
			
		||||
//   - Matching for pattern detector features is currently incomplete. For
 | 
			
		||||
//     example, matching for underflow as well as overflow detection is
 | 
			
		||||
//     possible, as would auto-reset, enabling saturated arithmetic, detecting
 | 
			
		||||
//     custom patterns, etc.
 | 
			
		||||
 | 
			
		||||
pattern xilinx_dsp_pack
 | 
			
		||||
 | 
			
		||||
state <SigBit> clock
 | 
			
		||||
| 
						 | 
				
			
			@ -5,12 +59,11 @@ state <SigSpec> sigA sigB sigC sigD sigM sigP
 | 
			
		|||
state <IdString> postAddAB postAddMuxAB
 | 
			
		||||
state <bool> ffA1cepol ffA2cepol ffADcepol ffB1cepol ffB2cepol ffDcepol ffMcepol ffPcepol
 | 
			
		||||
state <bool> ffArstpol ffADrstpol ffBrstpol ffDrstpol ffMrstpol ffPrstpol
 | 
			
		||||
 | 
			
		||||
state <Cell*> ffAD ffADcemux ffADrstmux ffA1 ffA1cemux ffA1rstmux ffA2 ffA2cemux ffA2rstmux
 | 
			
		||||
state <Cell*> ffB1 ffB1cemux ffB1rstmux ffB2 ffB2cemux ffB2rstmux
 | 
			
		||||
state <Cell*> ffD ffDcemux ffDrstmux ffM ffMcemux ffMrstmux ffP ffPcemux ffPrstmux
 | 
			
		||||
 | 
			
		||||
// subpattern
 | 
			
		||||
// Variables used for subpatterns
 | 
			
		||||
state <SigSpec> argQ argD
 | 
			
		||||
state <bool> ffcepol ffrstpol
 | 
			
		||||
state <int> ffoffset
 | 
			
		||||
| 
						 | 
				
			
			@ -19,6 +72,7 @@ udata <SigBit> dffclock
 | 
			
		|||
udata <Cell*> dff dffcemux dffrstmux
 | 
			
		||||
udata <bool> dffcepol dffrstpol
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a DSP48E1 cell
 | 
			
		||||
match dsp
 | 
			
		||||
	select dsp->type.in(\DSP48E1)
 | 
			
		||||
endmatch
 | 
			
		||||
| 
						 | 
				
			
			@ -50,17 +104,21 @@ code sigA sigB sigC sigD sigM clock
 | 
			
		|||
			sigM.append(P[i]);
 | 
			
		||||
		}
 | 
			
		||||
		log_assert(nusers(P.extract_end(i)) <= 1);
 | 
			
		||||
		// This sigM could have no users if downstream sinks (e.g. $add) is
 | 
			
		||||
		//   narrower than $mul result, for example
 | 
			
		||||
		if (sigM.empty())
 | 
			
		||||
			reject;
 | 
			
		||||
	}
 | 
			
		||||
	else
 | 
			
		||||
		sigM = P;
 | 
			
		||||
	// This sigM could have no users if downstream $add
 | 
			
		||||
	//   is narrower than $mul result, for example
 | 
			
		||||
	if (sigM.empty())
 | 
			
		||||
		reject;
 | 
			
		||||
 | 
			
		||||
	clock = port(dsp, \CLK, SigBit());
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Match the driver of the 'A' input to a possible $dff cell (ADREG)
 | 
			
		||||
//     (attached to at most two $mux cells that implement clock-enable or
 | 
			
		||||
//      reset functionality, using a subpattern discussed above)
 | 
			
		||||
//     If matched, treat 'A' input as input of ADREG
 | 
			
		||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
 | 
			
		||||
	if (param(dsp, \ADREG).as_int() == 0) {
 | 
			
		||||
		argQ = sigA;
 | 
			
		||||
| 
						 | 
				
			
			@ -81,6 +139,8 @@ code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) Match the driver of the 'A' and 'D' inputs for a possible $add cell
 | 
			
		||||
//     (pre-adder)
 | 
			
		||||
match preAdd
 | 
			
		||||
	if sigD.empty() || sigD.is_fully_zero()
 | 
			
		||||
	// Ensure that preAdder not already used
 | 
			
		||||
| 
						 | 
				
			
			@ -106,11 +166,12 @@ code sigA sigD
 | 
			
		|||
	if (preAdd) {
 | 
			
		||||
		sigA = port(preAdd, \A);
 | 
			
		||||
		sigD = port(preAdd, \B);
 | 
			
		||||
		if (GetSize(sigA) < GetSize(sigD))
 | 
			
		||||
			std::swap(sigA, sigD);
 | 
			
		||||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (4) If pre-adder was present, find match 'A' input for A2REG
 | 
			
		||||
//     If pre-adder was not present, move ADREG to A2REG
 | 
			
		||||
//     Then match 'A' input for A1REG
 | 
			
		||||
code argQ ffAD ffADcemux ffADrstmux ffADcepol ffADrstpol sigA clock ffA2 ffA2cemux ffA2rstmux ffA2cepol ffArstpol ffA1 ffA1cemux ffA1rstmux ffA1cepol
 | 
			
		||||
	// Only search for ffA2 if there was a pre-adder
 | 
			
		||||
	//   (otherwise ffA2 would have been matched as ffAD)
 | 
			
		||||
| 
						 | 
				
			
			@ -173,6 +234,8 @@ ffA1_end:		;
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (5) Match 'B' input for B2REG
 | 
			
		||||
//     If B2REG, then match 'B' input for B1REG
 | 
			
		||||
code argQ ffB2 ffB2cemux ffB2rstmux ffB2cepol ffBrstpol sigB clock ffB1 ffB1cemux ffB1rstmux ffB1cepol
 | 
			
		||||
	if (param(dsp, \BREG).as_int() == 0) {
 | 
			
		||||
		argQ = sigB;
 | 
			
		||||
| 
						 | 
				
			
			@ -222,6 +285,7 @@ ffB1_end:				;
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (6) Match 'D' input for DREG
 | 
			
		||||
code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
 | 
			
		||||
	if (param(dsp, \DREG).as_int() == 0) {
 | 
			
		||||
		argQ = sigD;
 | 
			
		||||
| 
						 | 
				
			
			@ -242,6 +306,7 @@ code argQ ffD ffDcemux ffDrstmux ffDcepol ffDrstpol sigD clock
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (7) Match 'P' output that exclusively drives an MREG
 | 
			
		||||
code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
 | 
			
		||||
	if (param(dsp, \MREG).as_int() == 0 && nusers(sigM) == 2) {
 | 
			
		||||
		argD = sigM;
 | 
			
		||||
| 
						 | 
				
			
			@ -263,6 +328,11 @@ code argD ffM ffMcemux ffMrstmux ffMcepol ffMrstpol sigM sigP clock
 | 
			
		|||
	sigP = sigM;
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (8) Match 'P' output that exclusively drives one of two inputs to an $add
 | 
			
		||||
//     cell (post-adder).
 | 
			
		||||
//     The other input to the adder is assumed to come in from the 'C' input
 | 
			
		||||
//     (note: 'P' -> 'C' connections that exist for accumulators are
 | 
			
		||||
//      recognised in xilinx_dsp.cc).
 | 
			
		||||
match postAdd
 | 
			
		||||
	// Ensure that Z mux is not already used
 | 
			
		||||
	if port(dsp, \OPMODE, SigSpec()).extract(4,3).is_fully_zero()
 | 
			
		||||
| 
						 | 
				
			
			@ -291,6 +361,7 @@ code sigC sigP
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (9) Match 'P' output that exclusively drives a PREG
 | 
			
		||||
code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
 | 
			
		||||
	if (param(dsp, \PREG).as_int() == 0) {
 | 
			
		||||
		int users = 2;
 | 
			
		||||
| 
						 | 
				
			
			@ -316,6 +387,19 @@ code argD ffP ffPcemux ffPrstmux ffPcepol ffPrstpol sigP clock
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (10) If post-adder and PREG both present, match for a $mux cell driving
 | 
			
		||||
//      the 'C' input, where one of the $mux's inputs is the PREG output.
 | 
			
		||||
//      This indicates an accumulator situation, and one where a $mux exists
 | 
			
		||||
//      to override the accumulated value:
 | 
			
		||||
//           +--------------------------------+
 | 
			
		||||
//           |   ____                         |
 | 
			
		||||
//           +--|    \                        |
 | 
			
		||||
//              |$mux|-+                      |
 | 
			
		||||
//       'C' ---|____/ |                      |
 | 
			
		||||
//                     | /-------\   +----+   |
 | 
			
		||||
//          +----+     +-| post- |___|PREG|---+ 'P'
 | 
			
		||||
//          |MREG|------ | adder |   +----+
 | 
			
		||||
//          +----+       \-------/
 | 
			
		||||
match postAddMux
 | 
			
		||||
	if postAdd
 | 
			
		||||
	if ffP
 | 
			
		||||
| 
						 | 
				
			
			@ -333,6 +417,11 @@ code sigC
 | 
			
		|||
		sigC = port(postAddMux, postAddMuxAB == \A ? \B : \A);
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (11) If PREG present, match for a greater-than-or-equal $ge cell attached to
 | 
			
		||||
//      the 'P' output where it is compared to a constant that is a power-of-2:
 | 
			
		||||
//      e.g. `assign overflow = (PREG >= 2**40);`
 | 
			
		||||
//      In this scenario, the pattern detector functionality of a DSP48E1 can
 | 
			
		||||
//      to implement this function
 | 
			
		||||
match overflow
 | 
			
		||||
	if ffP
 | 
			
		||||
	if param(dsp, \USE_PATTERN_DETECT, Const("NO_PATDET")).decode_string() == "NO_PATDET"
 | 
			
		||||
| 
						 | 
				
			
			@ -351,22 +440,45 @@ endcode
 | 
			
		|||
 | 
			
		||||
// #######################
 | 
			
		||||
 | 
			
		||||
// Subpattern for matching against input registers, based on knowledge of the
 | 
			
		||||
//   'Q' input. Typically, identifying registers with clock-enable and reset
 | 
			
		||||
//   capability would be a task would be handled by other Yosys passes such as
 | 
			
		||||
//   dff2dffe, but since DSP inference happens much before this, these patterns
 | 
			
		||||
//   have to be manually identified.
 | 
			
		||||
// At a high level:
 | 
			
		||||
//   (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//       'Q' argument
 | 
			
		||||
//   (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//       one that exclusively drives the 'D' input of the $dff, with one of its
 | 
			
		||||
//       $mux inputs being fully zero
 | 
			
		||||
//   (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//       exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//       the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//       the 'Q' output of the $dff
 | 
			
		||||
subpattern in_dffe
 | 
			
		||||
arg argD argQ clock
 | 
			
		||||
 | 
			
		||||
code
 | 
			
		||||
	dff = nullptr;
 | 
			
		||||
	for (auto c : argQ.chunks()) {
 | 
			
		||||
	for (const auto &c : argQ.chunks()) {
 | 
			
		||||
		// Abandon matches when 'Q' is a constant
 | 
			
		||||
		if (!c.wire)
 | 
			
		||||
			reject;
 | 
			
		||||
		// Abandon matches when 'Q' has the keep attribute set
 | 
			
		||||
		if (c.wire->get_bool_attribute(\keep))
 | 
			
		||||
			reject;
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, State::Sx);
 | 
			
		||||
		if (!init.is_fully_undef() && !init.is_fully_zero())
 | 
			
		||||
		// Abandon matches when 'Q' has a non-zero init attribute set
 | 
			
		||||
		// (not supported by DSP48E1)
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, Const());
 | 
			
		||||
		if (!init.empty())
 | 
			
		||||
			for (auto b : init.extract(c.offset, c.width))
 | 
			
		||||
				if (b != State::Sx && b != State::S0)
 | 
			
		||||
					reject;
 | 
			
		||||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//     'Q' argument
 | 
			
		||||
match ff
 | 
			
		||||
	select ff->type.in($dff)
 | 
			
		||||
	// DSP48E1 does not support clock inversion
 | 
			
		||||
| 
						 | 
				
			
			@ -379,14 +491,12 @@ match ff
 | 
			
		|||
	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
 | 
			
		||||
	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
 | 
			
		||||
 | 
			
		||||
	filter clock == SigBit() || port(ff, \CLK) == clock
 | 
			
		||||
 | 
			
		||||
	set ffoffset offset
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
code argQ argD
 | 
			
		||||
{
 | 
			
		||||
	if (clock != SigBit() && port(ff, \CLK) != clock)
 | 
			
		||||
		reject;
 | 
			
		||||
 | 
			
		||||
	SigSpec Q = port(ff, \Q);
 | 
			
		||||
	dff = ff;
 | 
			
		||||
	dffclock = port(ff, \CLK);
 | 
			
		||||
| 
						 | 
				
			
			@ -398,9 +508,11 @@ code argQ argD
 | 
			
		|||
	//   has two (ff, ffrstmux) users
 | 
			
		||||
	if (nusers(dffD) > 2)
 | 
			
		||||
		argD = SigSpec();
 | 
			
		||||
}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff, with one of the $mux
 | 
			
		||||
//     inputs being fully zero
 | 
			
		||||
match ffrstmux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffrstmux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			@ -432,6 +544,10 @@ code argD
 | 
			
		|||
		dffrstmux = nullptr;
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//     the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//     the 'Q' output of the $dff
 | 
			
		||||
match ffcemux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffcemux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			@ -456,16 +572,32 @@ endcode
 | 
			
		|||
 | 
			
		||||
// #######################
 | 
			
		||||
 | 
			
		||||
// Subpattern for matching against output registers, based on knowledge of the
 | 
			
		||||
//   'D' input.
 | 
			
		||||
// At a high level:
 | 
			
		||||
//   (1) Starting from an optional $mux cell that implements clock enable
 | 
			
		||||
//       semantics --- one where the given 'D' argument (partially or fully)
 | 
			
		||||
//       drives one of its two inputs
 | 
			
		||||
//   (2) Starting from, or continuing onto, another optional $mux cell that
 | 
			
		||||
//       implements synchronous reset semantics --- one where the given 'D'
 | 
			
		||||
//       argument (or the clock enable $mux output) drives one of its two inputs
 | 
			
		||||
//       and where the other input is fully zero
 | 
			
		||||
//   (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
 | 
			
		||||
//       output of the previous clock enable or reset $mux cells)
 | 
			
		||||
subpattern out_dffe
 | 
			
		||||
arg argD argQ clock
 | 
			
		||||
 | 
			
		||||
code
 | 
			
		||||
	dff = nullptr;
 | 
			
		||||
	for (auto c : argD.chunks())
 | 
			
		||||
		// Abandon matches when 'D' has the keep attribute set
 | 
			
		||||
		if (c.wire->get_bool_attribute(\keep))
 | 
			
		||||
			reject;
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (1) Starting from an optional $mux cell that implements clock enable
 | 
			
		||||
//     semantics --- one where the given 'D' argument (partially or fully)
 | 
			
		||||
//     drives one of its two inputs
 | 
			
		||||
match ffcemux
 | 
			
		||||
	select ffcemux->type.in($mux)
 | 
			
		||||
	// ffcemux output must have two users: ffcemux and ff.D
 | 
			
		||||
| 
						 | 
				
			
			@ -504,6 +636,10 @@ code argD argQ
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Starting from, or continuing onto, another optional $mux cell that
 | 
			
		||||
//     implements synchronous reset semantics --- one where the given 'D'
 | 
			
		||||
//     argument (or the clock enable $mux output) drives one of its two inputs
 | 
			
		||||
//     and where the other input is fully zero
 | 
			
		||||
match ffrstmux
 | 
			
		||||
	select ffrstmux->type.in($mux)
 | 
			
		||||
	// ffrstmux output must have two users: ffrstmux and ff.D
 | 
			
		||||
| 
						 | 
				
			
			@ -542,6 +678,8 @@ code argD argQ
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) Match for a $dff cell (whose 'D' input is the 'D' argument, or the
 | 
			
		||||
//     output of the previous clock enable or reset $mux cells)
 | 
			
		||||
match ff
 | 
			
		||||
	select ff->type.in($dff)
 | 
			
		||||
	// DSP48E1 does not support clock inversion
 | 
			
		||||
| 
						 | 
				
			
			@ -558,14 +696,12 @@ match ff
 | 
			
		|||
	// Check that FF.Q is connected to CE-mux
 | 
			
		||||
	filter !ffcemux || port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
 | 
			
		||||
 | 
			
		||||
	filter clock == SigBit() || port(ff, \CLK) == clock
 | 
			
		||||
 | 
			
		||||
	set ffoffset offset
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
code argQ
 | 
			
		||||
	if (ff) {
 | 
			
		||||
		if (clock != SigBit() && port(ff, \CLK) != clock)
 | 
			
		||||
			reject;
 | 
			
		||||
 | 
			
		||||
	SigSpec D = port(ff, \D);
 | 
			
		||||
	SigSpec Q = port(ff, \Q);
 | 
			
		||||
	if (!ffcemux) {
 | 
			
		||||
| 
						 | 
				
			
			@ -573,17 +709,17 @@ code argQ
 | 
			
		|||
		argQ.replace(D, Q);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Abandon matches when 'Q' has a non-zero init attribute set
 | 
			
		||||
	// (not supported by DSP48E1)
 | 
			
		||||
	for (auto c : argQ.chunks()) {
 | 
			
		||||
			Const init = c.wire->attributes.at(\init, State::Sx);
 | 
			
		||||
			if (!init.is_fully_undef() && !init.is_fully_zero())
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, Const());
 | 
			
		||||
		if (!init.empty())
 | 
			
		||||
			for (auto b : init.extract(c.offset, c.width))
 | 
			
		||||
				if (b != State::Sx && b != State::S0)
 | 
			
		||||
					reject;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dff = ff;
 | 
			
		||||
	dffQ = argQ;
 | 
			
		||||
	dffclock = port(ff, \CLK);
 | 
			
		||||
	}
 | 
			
		||||
	// No enable/reset mux possible without flop
 | 
			
		||||
	else if (dffcemux || dffrstmux)
 | 
			
		||||
		reject;
 | 
			
		||||
endcode
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,3 +1,26 @@
 | 
			
		|||
// This file describes the second of three pattern matcher setups that
 | 
			
		||||
//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc
 | 
			
		||||
// At a high level, it works as follows:
 | 
			
		||||
//   (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
 | 
			
		||||
//       and (b) uses the 'C' port
 | 
			
		||||
//   (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
 | 
			
		||||
//       (attached to at most two $mux cells that implement clock-enable or
 | 
			
		||||
//        reset functionality, using a subpattern discussed below)
 | 
			
		||||
// Notes:
 | 
			
		||||
//   - Running CREG packing after xilinx_dsp_pack is necessary since there is no
 | 
			
		||||
//     guarantee that the cell ordering corresponds to the "expected" case (i.e.
 | 
			
		||||
//     the order in which they appear in the source) thus the possiblity existed
 | 
			
		||||
//     that a register got packed as a CREG into a downstream DSP that should
 | 
			
		||||
//     have otherwise been a PREG of an upstream DSP that had not been visited
 | 
			
		||||
//     yet
 | 
			
		||||
//   - The reason this is separated out from the xilinx_dsp.pmg file is
 | 
			
		||||
//     for efficiency --- each *.pmg file creates a class of the same basename,
 | 
			
		||||
//     which when constructed, creates a custom database tailored to the
 | 
			
		||||
//     pattern(s) contained within. Since the pattern in this file must be
 | 
			
		||||
//     executed after the pattern contained in xilinx_dsp.pmg, it is necessary
 | 
			
		||||
//     to reconstruct this database. Separating the two patterns into
 | 
			
		||||
//     independent files causes two smaller, more specific, databases.
 | 
			
		||||
 | 
			
		||||
pattern xilinx_dsp_packC
 | 
			
		||||
 | 
			
		||||
udata <std::function<SigSpec(const SigSpec&)>> unextend
 | 
			
		||||
| 
						 | 
				
			
			@ -6,7 +29,7 @@ state <SigSpec> sigC sigP
 | 
			
		|||
state <bool> ffCcepol ffCrstpol
 | 
			
		||||
state <Cell*> ffC ffCcemux ffCrstmux
 | 
			
		||||
 | 
			
		||||
// subpattern
 | 
			
		||||
// Variables used for subpatterns
 | 
			
		||||
state <SigSpec> argQ argD
 | 
			
		||||
state <bool> ffcepol ffrstpol
 | 
			
		||||
state <int> ffoffset
 | 
			
		||||
| 
						 | 
				
			
			@ -15,13 +38,15 @@ udata <SigBit> dffclock
 | 
			
		|||
udata <Cell*> dff dffcemux dffrstmux
 | 
			
		||||
udata <bool> dffcepol dffrstpol
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a DSP48E1 cell that (a) doesn't have a CREG already,
 | 
			
		||||
//     and (b) uses the 'C' port
 | 
			
		||||
match dsp
 | 
			
		||||
	select dsp->type.in(\DSP48E1)
 | 
			
		||||
	select param(dsp, \CREG, 1).as_int() == 0
 | 
			
		||||
	select nusers(port(dsp, \C, SigSpec())) > 1
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
 | 
			
		||||
code sigC sigP clock
 | 
			
		||||
	unextend = [](const SigSpec &sig) {
 | 
			
		||||
		int i;
 | 
			
		||||
		for (i = GetSize(sig)-1; i > 0; i--)
 | 
			
		||||
| 
						 | 
				
			
			@ -48,11 +73,13 @@ code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC sigP clock
 | 
			
		|||
	else
 | 
			
		||||
		sigP = P;
 | 
			
		||||
 | 
			
		||||
	if (sigC == sigP)
 | 
			
		||||
		reject;
 | 
			
		||||
 | 
			
		||||
	clock = port(dsp, \CLK, SigBit());
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Match the driver of the 'C' input to a possible $dff cell (CREG)
 | 
			
		||||
//     (attached to at most two $mux cells that implement clock-enable or
 | 
			
		||||
//      reset functionality, using the in_dffe subpattern)
 | 
			
		||||
code argQ ffC ffCcemux ffCrstmux ffCcepol ffCrstpol sigC clock
 | 
			
		||||
	argQ = sigC;
 | 
			
		||||
	subpattern(in_dffe);
 | 
			
		||||
	if (dff) {
 | 
			
		||||
| 
						 | 
				
			
			@ -77,22 +104,44 @@ endcode
 | 
			
		|||
 | 
			
		||||
// #######################
 | 
			
		||||
 | 
			
		||||
// Subpattern for matching against input registers, based on knowledge of the
 | 
			
		||||
//   'Q' input. Typically, identifying registers with clock-enable and reset
 | 
			
		||||
//   capability would be a task would be handled by other Yosys passes such as
 | 
			
		||||
//   dff2dffe, but since DSP inference happens much before this, these patterns
 | 
			
		||||
//   have to be manually identified.
 | 
			
		||||
// At a high level:
 | 
			
		||||
//   (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//       'Q' argument
 | 
			
		||||
//   (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//       one that exclusively drives the 'D' input of the $dff, with one of its
 | 
			
		||||
//       $mux inputs being fully zero
 | 
			
		||||
//   (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//       exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//       the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//       the 'Q' output of the $dff
 | 
			
		||||
subpattern in_dffe
 | 
			
		||||
arg argD argQ clock
 | 
			
		||||
 | 
			
		||||
code
 | 
			
		||||
	dff = nullptr;
 | 
			
		||||
	for (auto c : argQ.chunks()) {
 | 
			
		||||
	for (const auto &c : argQ.chunks()) {
 | 
			
		||||
		// Abandon matches when 'Q' is a constant
 | 
			
		||||
		if (!c.wire)
 | 
			
		||||
			reject;
 | 
			
		||||
		// Abandon matches when 'Q' has the keep attribute set
 | 
			
		||||
		if (c.wire->get_bool_attribute(\keep))
 | 
			
		||||
			reject;
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, State::Sx);
 | 
			
		||||
		if (!init.is_fully_undef() && !init.is_fully_zero())
 | 
			
		||||
		// Abandon matches when 'Q' has a non-zero init attribute set
 | 
			
		||||
		// (not supported by DSP48E1)
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, Const());
 | 
			
		||||
		for (auto b : init.extract(c.offset, c.width))
 | 
			
		||||
			if (b != State::Sx && b != State::S0)
 | 
			
		||||
				reject;
 | 
			
		||||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//     'Q' argument
 | 
			
		||||
match ff
 | 
			
		||||
	select ff->type.in($dff)
 | 
			
		||||
	// DSP48E1 does not support clock inversion
 | 
			
		||||
| 
						 | 
				
			
			@ -105,14 +154,12 @@ match ff
 | 
			
		|||
	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
 | 
			
		||||
	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
 | 
			
		||||
 | 
			
		||||
	filter clock == SigBit() || port(ff, \CLK) == clock
 | 
			
		||||
 | 
			
		||||
	set ffoffset offset
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
code argQ argD
 | 
			
		||||
{
 | 
			
		||||
	if (clock != SigBit() && port(ff, \CLK) != clock)
 | 
			
		||||
		reject;
 | 
			
		||||
 | 
			
		||||
	SigSpec Q = port(ff, \Q);
 | 
			
		||||
	dff = ff;
 | 
			
		||||
	dffclock = port(ff, \CLK);
 | 
			
		||||
| 
						 | 
				
			
			@ -124,9 +171,11 @@ code argQ argD
 | 
			
		|||
	//   has two (ff, ffrstmux) users
 | 
			
		||||
	if (nusers(dffD) > 2)
 | 
			
		||||
		argD = SigSpec();
 | 
			
		||||
}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff, with one of the $mux
 | 
			
		||||
//     inputs being fully zero
 | 
			
		||||
match ffrstmux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffrstmux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			@ -158,6 +207,10 @@ code argD
 | 
			
		|||
		dffrstmux = nullptr;
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//     the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//     the 'Q' output of the $dff
 | 
			
		||||
match ffcemux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffcemux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,3 +1,46 @@
 | 
			
		|||
// This file describes the third of three pattern matcher setups that
 | 
			
		||||
//   forms the `xilinx_dsp` pass described in xilinx_dsp.cc
 | 
			
		||||
// At a high level, it works as follows:
 | 
			
		||||
//   (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
 | 
			
		||||
//       (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
 | 
			
		||||
//       use the 'PCOUT' port
 | 
			
		||||
//   (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
 | 
			
		||||
//         (b) has its Z multiplexer output set to the 'C' port, which is
 | 
			
		||||
//         driven by the 'P' output of the previous DSP cell, and (c) has its
 | 
			
		||||
//         'PCIN' port unused
 | 
			
		||||
//   (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
 | 
			
		||||
//         previous DSP cell right-shifted by 17 bits
 | 
			
		||||
//   (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
 | 
			
		||||
//       if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
 | 
			
		||||
//       DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
 | 
			
		||||
//       have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
 | 
			
		||||
//       use its ACOUT port, then examine if an ACOUT -> ACIN cascade
 | 
			
		||||
//       opportunity exists by matching for a $dff-with-optional-clock-enable-
 | 
			
		||||
//       or-reset and checking that the 'D' input of this register is the same
 | 
			
		||||
//       as the 'A' input of the previous DSP
 | 
			
		||||
//   (4) Same as (3) but for BCOUT -> BCIN cascade
 | 
			
		||||
//   (5) Recursively go to (2.1) until no more matches possible, keeping track
 | 
			
		||||
//       of the longest possible chain found
 | 
			
		||||
//   (6) The longest chain is then divided into chunks of no more than
 | 
			
		||||
//       MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
 | 
			
		||||
//       height of a DSP column) with each DSP in each chunk being rewritten
 | 
			
		||||
//       to use [ABP]COUT -> [ABP]CIN cascading as appropriate
 | 
			
		||||
// Notes:
 | 
			
		||||
//   - Currently, [AB]COUT -> [AB]COUT cascades (3 or 4) are only considered
 | 
			
		||||
//     if a PCOUT -> PCIN cascade is (2.1 or 2.2) first identified; this need
 | 
			
		||||
//     not be the case --- [AB] cascades can exist independently of a P cascade
 | 
			
		||||
//     (though all three cascades must come from the same DSP). This situation
 | 
			
		||||
//     is not handled currently.
 | 
			
		||||
//   - In addition, [AB]COUT -> [AB]COUT cascades (3 or 4) are currently
 | 
			
		||||
//     conservative in that they examine the situation where (a) the previous
 | 
			
		||||
//     DSP has [AB]2REG or [AB]1REG enabled, (b) that the downstream DSP has no
 | 
			
		||||
//     registers enabled, and (c) that there exists only one additional register
 | 
			
		||||
//     between the upstream and downstream DSPs. This can certainly be relaxed
 | 
			
		||||
//     to identify situations ranging from (i) neither DSP uses any registers,
 | 
			
		||||
//     to (ii) upstream DSP has 2 registers, downstream DSP has 2 registers, and
 | 
			
		||||
//     there exists a further 2 registers between them. This remains a TODO
 | 
			
		||||
//     item.
 | 
			
		||||
 | 
			
		||||
pattern xilinx_dsp_cascade
 | 
			
		||||
 | 
			
		||||
udata <std::function<SigSpec(const SigSpec&)>> unextend
 | 
			
		||||
| 
						 | 
				
			
			@ -6,7 +49,7 @@ state <Cell*> next
 | 
			
		|||
state <SigSpec> clock
 | 
			
		||||
state <int> AREG BREG
 | 
			
		||||
 | 
			
		||||
// subpattern
 | 
			
		||||
// Variables used for subpatterns
 | 
			
		||||
state <SigSpec> argQ argD
 | 
			
		||||
state <bool> ffcepol ffrstpol
 | 
			
		||||
state <int> ffoffset
 | 
			
		||||
| 
						 | 
				
			
			@ -19,12 +62,19 @@ code
 | 
			
		|||
#define MAX_DSP_CASCADE 20
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a DSP48E1 cell that (a) has the Z multiplexer
 | 
			
		||||
//     (controlled by OPMODE[6:4]) set to zero and (b) doesn't already
 | 
			
		||||
//     use the 'PCOUT' port
 | 
			
		||||
match first
 | 
			
		||||
	select first->type.in(\DSP48E1)
 | 
			
		||||
	select port(first, \OPMODE, Const(0, 7)).extract(4,3) == Const::from_string("000")
 | 
			
		||||
	select nusers(port(first, \PCOUT, SigSpec())) <= 1
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
// (6) The longest chain is then divided into chunks of no more than
 | 
			
		||||
//     MAX_DSP_CASCADE in length (to prevent long cascades that exceed the
 | 
			
		||||
//     height of a DSP column) with each DSP in each chunk being rewritten
 | 
			
		||||
//     to use [ABP]COUT -> [ABP]CIN cascading as appropriate
 | 
			
		||||
code
 | 
			
		||||
	longest_chain.clear();
 | 
			
		||||
	chain.emplace_back(first, -1, -1, -1);
 | 
			
		||||
| 
						 | 
				
			
			@ -106,6 +156,10 @@ subpattern tail
 | 
			
		|||
arg first
 | 
			
		||||
arg next
 | 
			
		||||
 | 
			
		||||
// (2.1) Match another DSP48E1 cell that (a) does not have the CREG enabled,
 | 
			
		||||
//       (b) has its Z multiplexer output set to the 'C' port, which is
 | 
			
		||||
//       driven by the 'P' output of the previous DSP cell, and (c) has its
 | 
			
		||||
//       'PCIN' port unused
 | 
			
		||||
match nextP
 | 
			
		||||
	select nextP->type.in(\DSP48E1)
 | 
			
		||||
	select !param(nextP, \CREG, State::S1).as_bool()
 | 
			
		||||
| 
						 | 
				
			
			@ -116,6 +170,8 @@ match nextP
 | 
			
		|||
	semioptional
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
// (2.2) Same as (2.1) but with the 'C' port driven by the 'P' output of the
 | 
			
		||||
//       previous DSP cell right-shifted by 17 bits
 | 
			
		||||
match nextP_shift17
 | 
			
		||||
	if !nextP
 | 
			
		||||
	select nextP_shift17->type.in(\DSP48E1)
 | 
			
		||||
| 
						 | 
				
			
			@ -145,6 +201,14 @@ code next
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) For this subequent DSP48E1 match (i.e. PCOUT -> PCIN cascade exists)
 | 
			
		||||
//     if (a) the previous DSP48E1 uses either the A2REG or A1REG, (b) this
 | 
			
		||||
//     DSP48 does not use A2REG nor A1REG, (c) this DSP48E1 does not already
 | 
			
		||||
//     have an ACOUT -> ACIN cascade, (d) the previous DSP does not already
 | 
			
		||||
//     use its ACOUT port, then examine if an ACOUT -> ACIN cascade
 | 
			
		||||
//     opportunity exists by matching for a $dff-with-optional-clock-enable-
 | 
			
		||||
//     or-reset and checking that the 'D' input of this register is the same
 | 
			
		||||
//     as the 'A' input of the previous DSP
 | 
			
		||||
code argQ clock AREG
 | 
			
		||||
	AREG = -1;
 | 
			
		||||
	if (next) {
 | 
			
		||||
| 
						 | 
				
			
			@ -152,7 +216,6 @@ code argQ clock AREG
 | 
			
		|||
		if (param(prev, \AREG, 2).as_int() > 0 &&
 | 
			
		||||
				param(next, \AREG, 2).as_int() > 0 &&
 | 
			
		||||
				param(next, \A_INPUT, Const("DIRECT")).decode_string() == "DIRECT" &&
 | 
			
		||||
				port(next, \ACIN, SigSpec()).is_fully_zero() &&
 | 
			
		||||
				nusers(port(prev, \ACOUT, SigSpec())) <= 1) {
 | 
			
		||||
			argQ = unextend(port(next, \A));
 | 
			
		||||
			clock = port(prev, \CLK);
 | 
			
		||||
| 
						 | 
				
			
			@ -174,6 +237,7 @@ reject_AREG:			;
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (4) Same as (3) but for BCOUT -> BCIN cascade
 | 
			
		||||
code argQ clock BREG
 | 
			
		||||
	BREG = -1;
 | 
			
		||||
	if (next) {
 | 
			
		||||
| 
						 | 
				
			
			@ -203,13 +267,14 @@ reject_BREG:			;
 | 
			
		|||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (5) Recursively go to (2.1) until no more matches possible, recording the
 | 
			
		||||
//     longest possible chain
 | 
			
		||||
code
 | 
			
		||||
	if (next) {
 | 
			
		||||
		chain.emplace_back(next, nextP_shift17 ? 17 : nextP ? 0 : -1, AREG, BREG);
 | 
			
		||||
 | 
			
		||||
		SigSpec sigC = unextend(port(next, \C));
 | 
			
		||||
 | 
			
		||||
		// TODO: Cannot use 'reject' since semioptional
 | 
			
		||||
		if (nextP_shift17) {
 | 
			
		||||
			if (GetSize(sigC)+17 <= GetSize(port(std::get<0>(chain.back()), \P)) &&
 | 
			
		||||
					port(std::get<0>(chain.back()), \P).extract(17, GetSize(sigC)) != sigC)
 | 
			
		||||
| 
						 | 
				
			
			@ -232,22 +297,44 @@ endcode
 | 
			
		|||
 | 
			
		||||
// #######################
 | 
			
		||||
 | 
			
		||||
// Subpattern for matching against input registers, based on knowledge of the
 | 
			
		||||
//   'Q' input. Typically, identifying registers with clock-enable and reset
 | 
			
		||||
//   capability would be a task would be handled by other Yosys passes such as
 | 
			
		||||
//   dff2dffe, but since DSP inference happens much before this, these patterns
 | 
			
		||||
//   have to be manually identified.
 | 
			
		||||
// At a high level:
 | 
			
		||||
//   (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//       'Q' argument
 | 
			
		||||
//   (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//       one that exclusively drives the 'D' input of the $dff, with one of its
 | 
			
		||||
//       $mux inputs being fully zero
 | 
			
		||||
//   (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//       exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//       the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//       the 'Q' output of the $dff
 | 
			
		||||
subpattern in_dffe
 | 
			
		||||
arg argD argQ clock
 | 
			
		||||
 | 
			
		||||
code
 | 
			
		||||
	dff = nullptr;
 | 
			
		||||
	for (auto c : argQ.chunks()) {
 | 
			
		||||
	for (const auto &c : argQ.chunks()) {
 | 
			
		||||
		// Abandon matches when 'Q' is a constant
 | 
			
		||||
		if (!c.wire)
 | 
			
		||||
			reject;
 | 
			
		||||
		// Abandon matches when 'Q' has the keep attribute set
 | 
			
		||||
		if (c.wire->get_bool_attribute(\keep))
 | 
			
		||||
			reject;
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, State::Sx);
 | 
			
		||||
		if (!init.is_fully_undef() && !init.is_fully_zero())
 | 
			
		||||
		// Abandon matches when 'Q' has a non-zero init attribute set
 | 
			
		||||
		// (not supported by DSP48E1)
 | 
			
		||||
		Const init = c.wire->attributes.at(\init, Const());
 | 
			
		||||
		for (auto b : init.extract(c.offset, c.width))
 | 
			
		||||
			if (b != State::Sx && b != State::S0)
 | 
			
		||||
				reject;
 | 
			
		||||
	}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (1) Starting from a $dff cell that (partially or fully) drives the given
 | 
			
		||||
//     'Q' argument
 | 
			
		||||
match ff
 | 
			
		||||
	select ff->type.in($dff)
 | 
			
		||||
	// DSP48E1 does not support clock inversion
 | 
			
		||||
| 
						 | 
				
			
			@ -260,14 +347,12 @@ match ff
 | 
			
		|||
	filter GetSize(port(ff, \Q)) >= offset + GetSize(argQ)
 | 
			
		||||
	filter port(ff, \Q).extract(offset, GetSize(argQ)) == argQ
 | 
			
		||||
 | 
			
		||||
	filter clock == SigBit() || port(ff, \CLK) == clock
 | 
			
		||||
 | 
			
		||||
	set ffoffset offset
 | 
			
		||||
endmatch
 | 
			
		||||
 | 
			
		||||
code argQ argD
 | 
			
		||||
{
 | 
			
		||||
	if (clock != SigBit() && port(ff, \CLK) != clock)
 | 
			
		||||
		reject;
 | 
			
		||||
 | 
			
		||||
	SigSpec Q = port(ff, \Q);
 | 
			
		||||
	dff = ff;
 | 
			
		||||
	dffclock = port(ff, \CLK);
 | 
			
		||||
| 
						 | 
				
			
			@ -279,9 +364,11 @@ code argQ argD
 | 
			
		|||
	//   has two (ff, ffrstmux) users
 | 
			
		||||
	if (nusers(dffD) > 2)
 | 
			
		||||
		argD = SigSpec();
 | 
			
		||||
}
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (2) Match for a $mux cell implementing synchronous reset semantics ---
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff, with one of the $mux
 | 
			
		||||
//     inputs being fully zero
 | 
			
		||||
match ffrstmux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffrstmux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			@ -313,6 +400,10 @@ code argD
 | 
			
		|||
		dffrstmux = nullptr;
 | 
			
		||||
endcode
 | 
			
		||||
 | 
			
		||||
// (3) Match for a $mux cell implement clock enable semantics --- one that
 | 
			
		||||
//     exclusively drives the 'D' input of the $dff (or the other input of
 | 
			
		||||
//     the reset $mux) and where one of this $mux's inputs is connected to
 | 
			
		||||
//     the 'Q' output of the $dff
 | 
			
		||||
match ffcemux
 | 
			
		||||
	if !argD.empty()
 | 
			
		||||
	select ffcemux->type.in($mux)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -342,7 +342,11 @@ struct SynthXilinxPass : public ScriptPass
 | 
			
		|||
		if (check_label("map_dsp", "(skip if '-nodsp')")) {
 | 
			
		||||
			if (!nodsp || help_mode) {
 | 
			
		||||
				// NB: Xilinx multipliers are signed only
 | 
			
		||||
				run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 -D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 "
 | 
			
		||||
				run("techmap -map +/mul2dsp.v -map +/xilinx/dsp_map.v -D DSP_A_MAXWIDTH=25 "
 | 
			
		||||
					"-D DSP_A_MAXWIDTH_PARTIAL=18 -D DSP_B_MAXWIDTH=18 "    // Partial multipliers are intentionally
 | 
			
		||||
												// limited to 18x18 in order to take
 | 
			
		||||
												// advantage of the (PCOUT << 17) -> PCIN
 | 
			
		||||
												// dedicated cascade chain capability
 | 
			
		||||
					"-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 " // Blocks Nx1 multipliers
 | 
			
		||||
					"-D DSP_Y_MINWIDTH=9 " // UG901 suggests small multiplies are those 4x4 and smaller
 | 
			
		||||
					"-D DSP_SIGNEDONLY=1 -D DSP_NAME=$__MUL25X18");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue