mirror of
https://github.com/YosysHQ/yosys
synced 2025-08-03 18:00:24 +00:00
Merge 62196cbc0a
into 262b00d5e5
This commit is contained in:
commit
91e0486e5c
3 changed files with 176 additions and 36 deletions
|
@ -195,20 +195,18 @@ module \$__XILINX_SHIFTX (A, B, Y);
|
||||||
else if (A_WIDTH < `MIN_MUX_INPUTS) begin
|
else if (A_WIDTH < `MIN_MUX_INPUTS) begin
|
||||||
wire _TECHMAP_FAIL_ = 1;
|
wire _TECHMAP_FAIL_ = 1;
|
||||||
end
|
end
|
||||||
else if (A_WIDTH == 2) begin
|
else if (A_WIDTH == 1) begin
|
||||||
MUXF7 fpga_hard_mux (.I0(A[0]), .I1(A[1]), .S(B[0]), .O(Y));
|
assign Y = A[0];
|
||||||
end
|
end
|
||||||
|
// Use one LUT3 instead of a MUXF7 because a MUXF7 gets its inputs from 2 LUT6.O6 anyway
|
||||||
|
else if (A_WIDTH == 2) begin
|
||||||
|
assign Y = B[0] ? A[1] : A[0];
|
||||||
|
end
|
||||||
|
// Use one LUT6 instead of 2 LUT3 + MUXF7 because a MUXF7 gets its inputs from 2 LUT6.O6 anyway
|
||||||
else if (A_WIDTH <= 4) begin
|
else if (A_WIDTH <= 4) begin
|
||||||
wire [4-1:0] Ax;
|
wire [4-1:0] Ax = {{(4-A_WIDTH){1'bx}}, A};
|
||||||
if (A_WIDTH == 4)
|
assign Y = B[1] ? (B[0] ? Ax[3] : Ax[2])
|
||||||
assign Ax = A;
|
: (B[0] ? Ax[1] : Ax[0]);
|
||||||
else
|
|
||||||
// Rather than extend with 1'bx which gets flattened to 1'b0
|
|
||||||
// causing the "don't care" status to get lost, extend with
|
|
||||||
// the same driver of F7B.I0 so that we can optimise F7B away
|
|
||||||
// later
|
|
||||||
assign Ax = {A[1], A};
|
|
||||||
\$__XILINX_MUXF78 fpga_hard_mux (.I0(Ax[0]), .I1(Ax[2]), .I2(Ax[1]), .I3(Ax[3]), .S0(B[1]), .S1(B[0]), .O(Y));
|
|
||||||
end
|
end
|
||||||
// Note that the following decompositions are 'backwards' in that
|
// Note that the following decompositions are 'backwards' in that
|
||||||
// the LSBs are placed on the hard resources, and the soft resources
|
// the LSBs are placed on the hard resources, and the soft resources
|
||||||
|
@ -232,15 +230,46 @@ module \$__XILINX_SHIFTX (A, B, Y);
|
||||||
// but that the 'backwards' mapping (left) is more delay efficient
|
// but that the 'backwards' mapping (left) is more delay efficient
|
||||||
// since smaller LUTs are faster than wider ones.
|
// since smaller LUTs are faster than wider ones.
|
||||||
else if (A_WIDTH <= 8) begin
|
else if (A_WIDTH <= 8) begin
|
||||||
wire [8-1:0] Ax = {{{8-A_WIDTH}{1'bx}}, A};
|
wire [8-1:0] Ax = {{(8-A_WIDTH){1'bx}}, A};
|
||||||
wire T0 = B[2] ? Ax[4] : Ax[0];
|
// For 5-8 inputs, there are 2 possible implementations:
|
||||||
wire T1 = B[2] ? Ax[5] : Ax[1];
|
// - Using 4 LUT3 + 2 MUXF7 + MUXF8
|
||||||
wire T2 = B[2] ? Ax[6] : Ax[2];
|
// The MUXF7 inputs come from LUT6.O6 outputs so at best this would mean I5=1, inputs I4-I3-I2 for the MUX, leaving only I1-I0 available for other logic
|
||||||
wire T3 = B[2] ? Ax[7] : Ax[3];
|
// This means only 4 other LUT2 operations can be mapped to the slice (or larger ones assuming input sharing)
|
||||||
\$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y));
|
// - Using 2 LUT6 + MUXF7
|
||||||
|
// This leaves 2 LUT6 + 2 inputs Cx and Dx available for other logic
|
||||||
|
// The solution used here is 2 LUT6 + MUXF7 for the following reasons :
|
||||||
|
// - Area report is much closer to what a user would expect
|
||||||
|
// - The rest of the slice is probably easier to use by place and route tools
|
||||||
|
// - Delay should be rather similar
|
||||||
|
wire T0 = B[1] ? (B[2] ? Ax[6] : Ax[2])
|
||||||
|
: (B[2] ? Ax[4] : Ax[0]);
|
||||||
|
wire T1 = B[1] ? (B[2] ? Ax[7] : Ax[3])
|
||||||
|
: (B[2] ? Ax[5] : Ax[1]);
|
||||||
|
MUXF7 fpga_hard_mux (.I0(T0), .I1(T1), .S(B[0]), .O(Y));
|
||||||
|
end
|
||||||
|
else if (A_WIDTH <= 12) begin
|
||||||
|
wire [12-1:0] Ax = {{(12-A_WIDTH){1'bx}}, A};
|
||||||
|
// For 9-12 inputs, only 3 LUT6 are needed
|
||||||
|
// Note that an explicit user objective of optimization for delay might make the mux16 below preferrable
|
||||||
|
// (not a binary decision though : the overall design would be larger, which is not good for wire delay)
|
||||||
|
wire T0 = B[1] ? (B[0] ? Ax[ 3] : Ax[ 2])
|
||||||
|
: (B[0] ? Ax[ 1] : Ax[ 0]);
|
||||||
|
wire T1 = B[1] ? (B[0] ? Ax[ 7] : Ax[ 6])
|
||||||
|
: (B[0] ? Ax[ 5] : Ax[ 4]);
|
||||||
|
wire T2 = B[1] ? (B[0] ? Ax[11] : Ax[10])
|
||||||
|
: (B[0] ? Ax[ 9] : Ax[ 8]);
|
||||||
|
// Set parameters _TECHMAP_* to indicate that I2===I3 so that the upper MUXF7 is bypassed (assuming this is well handled by pnr tools)
|
||||||
|
\$__XILINX_MUXF78 #(
|
||||||
|
._TECHMAP_BITS_CONNMAP_(2),
|
||||||
|
._TECHMAP_CONNMAP_I0_(2'd0),
|
||||||
|
._TECHMAP_CONNMAP_I1_(2'd1),
|
||||||
|
._TECHMAP_CONNMAP_I2_(2'd2),
|
||||||
|
._TECHMAP_CONNMAP_I3_(2'd2)
|
||||||
|
) fpga_hard_mux (.I0(T0), .I1(T1), .I2(T2), .I3(T2), .S0(B[2]), .S1(B[3]), .O(Y));
|
||||||
end
|
end
|
||||||
else if (A_WIDTH <= 16) begin
|
else if (A_WIDTH <= 16) begin
|
||||||
wire [16-1:0] Ax = {{{16-A_WIDTH}{1'bx}}, A};
|
// For 13-16 inputs, use the full slice with 'backwards' decomposition described above
|
||||||
|
wire [16-1:0] Ax = {{(16-A_WIDTH){1'bx}}, A};
|
||||||
wire T0 = B[2] ? B[3] ? Ax[12] : Ax[4]
|
wire T0 = B[2] ? B[3] ? Ax[12] : Ax[4]
|
||||||
: B[3] ? Ax[ 8] : Ax[0];
|
: B[3] ? Ax[ 8] : Ax[0];
|
||||||
wire T1 = B[2] ? B[3] ? Ax[13] : Ax[5]
|
wire T1 = B[2] ? B[3] ? Ax[13] : Ax[5]
|
||||||
|
@ -252,32 +281,36 @@ module \$__XILINX_SHIFTX (A, B, Y);
|
||||||
\$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y));
|
\$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y));
|
||||||
end
|
end
|
||||||
else begin
|
else begin
|
||||||
|
// For more than 16 inputs, recursively split into sub-multiplexers of size at most 16
|
||||||
localparam num_mux16 = (A_WIDTH+15) / 16;
|
localparam num_mux16 = (A_WIDTH+15) / 16;
|
||||||
localparam clog2_num_mux16 = $clog2(num_mux16);
|
localparam clog2_num_mux16 = $clog2(num_mux16);
|
||||||
wire [num_mux16-1:0] T;
|
wire [num_mux16-1:0] T;
|
||||||
wire [num_mux16*16-1:0] Ax = {{(num_mux16*16-A_WIDTH){1'bx}}, A};
|
for (i = 0; i < num_mux16; i++) begin
|
||||||
for (i = 0; i < num_mux16; i++)
|
localparam local_num_in = (A_WIDTH-i*16 < 16) ? A_WIDTH-i*16 : 16;
|
||||||
|
localparam clog2_num_in = $clog2(local_num_in);
|
||||||
\$__XILINX_SHIFTX #(
|
\$__XILINX_SHIFTX #(
|
||||||
.A_SIGNED(A_SIGNED),
|
.A_SIGNED(A_SIGNED),
|
||||||
.B_SIGNED(B_SIGNED),
|
.B_SIGNED(B_SIGNED),
|
||||||
.A_WIDTH(16),
|
.A_WIDTH(local_num_in),
|
||||||
.B_WIDTH(4),
|
.B_WIDTH(clog2_num_in),
|
||||||
.Y_WIDTH(Y_WIDTH)
|
.Y_WIDTH(Y_WIDTH)
|
||||||
) fpga_mux (
|
) fpga_mux (
|
||||||
.A(Ax[i*16+:16]),
|
.A(A[i*16+:local_num_in]),
|
||||||
.B(B[3:0]),
|
.B(B[clog2_num_in-1:0]),
|
||||||
.Y(T[i])
|
.Y(T[i])
|
||||||
);
|
);
|
||||||
|
end
|
||||||
\$__XILINX_SHIFTX #(
|
\$__XILINX_SHIFTX #(
|
||||||
.A_SIGNED(A_SIGNED),
|
.A_SIGNED(A_SIGNED),
|
||||||
.B_SIGNED(B_SIGNED),
|
.B_SIGNED(B_SIGNED),
|
||||||
.A_WIDTH(num_mux16),
|
.A_WIDTH(num_mux16),
|
||||||
.B_WIDTH(clog2_num_mux16),
|
.B_WIDTH(clog2_num_mux16),
|
||||||
.Y_WIDTH(Y_WIDTH)
|
.Y_WIDTH(Y_WIDTH)
|
||||||
) _TECHMAP_REPLACE_ (
|
) _TECHMAP_REPLACE_ (
|
||||||
.A(T),
|
.A(T),
|
||||||
.B(B[B_WIDTH-1-:clog2_num_mux16]),
|
.B(B[4+:clog2_num_mux16]),
|
||||||
.Y(Y));
|
.Y(Y)
|
||||||
|
);
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
endmodule
|
endmodule
|
||||||
|
|
|
@ -51,10 +51,32 @@ module mux8 ( S, D, Y );
|
||||||
end
|
end
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
module mux12 (D, S, Y);
|
||||||
|
input [11:0] D;
|
||||||
|
input [3:0] S;
|
||||||
|
output Y;
|
||||||
|
|
||||||
|
wire[15:0] D16;
|
||||||
|
|
||||||
|
assign D16 = {4'bx, D};
|
||||||
|
assign Y = D16[S];
|
||||||
|
endmodule
|
||||||
|
|
||||||
module mux16 (D, S, Y);
|
module mux16 (D, S, Y);
|
||||||
input [15:0] D;
|
input [15:0] D;
|
||||||
input [3:0] S;
|
input [3:0] S;
|
||||||
output Y;
|
output Y;
|
||||||
|
|
||||||
assign Y = D[S];
|
assign Y = D[S];
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
module mux20 (D, S, Y);
|
||||||
|
input [19:0] D;
|
||||||
|
input [4:0] S;
|
||||||
|
output Y;
|
||||||
|
|
||||||
|
wire[31:0] D32;
|
||||||
|
|
||||||
|
assign D32 = {12'bx, D};
|
||||||
|
assign Y = D32[S];
|
||||||
|
endmodule
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
|
|
||||||
read_verilog ../common/mux.v
|
read_verilog ../common/mux.v
|
||||||
design -save read
|
design -save read
|
||||||
|
|
||||||
|
|
||||||
|
# mux2
|
||||||
|
|
||||||
hierarchy -top mux2
|
hierarchy -top mux2
|
||||||
proc
|
proc
|
||||||
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad # equivalency check
|
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad # equivalency check
|
||||||
|
@ -8,9 +12,12 @@ design -load postopt # load the post-opt design (otherwise equiv_opt loads the p
|
||||||
cd mux2 # Constrain all select calls below inside the top module
|
cd mux2 # Constrain all select calls below inside the top module
|
||||||
select -assert-count 1 t:LUT3
|
select -assert-count 1 t:LUT3
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
select -assert-none t:LUT3 %% t:* %D
|
select -assert-none t:LUT3 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux4
|
||||||
|
|
||||||
design -load read
|
design -load read
|
||||||
hierarchy -top mux4
|
hierarchy -top mux4
|
||||||
proc
|
proc
|
||||||
|
@ -19,9 +26,12 @@ design -load postopt # load the post-opt design (otherwise equiv_opt loads the p
|
||||||
cd mux4 # Constrain all select calls below inside the top module
|
cd mux4 # Constrain all select calls below inside the top module
|
||||||
select -assert-count 1 t:LUT6
|
select -assert-count 1 t:LUT6
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
select -assert-none t:LUT6 %% t:* %D
|
select -assert-none t:LUT6 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux8 without widemux
|
||||||
|
|
||||||
design -load read
|
design -load read
|
||||||
hierarchy -top mux8
|
hierarchy -top mux8
|
||||||
proc
|
proc
|
||||||
|
@ -31,9 +41,43 @@ cd mux8 # Constrain all select calls below inside the top module
|
||||||
select -assert-count 1 t:LUT3
|
select -assert-count 1 t:LUT3
|
||||||
select -assert-count 2 t:LUT6
|
select -assert-count 2 t:LUT6
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
select -assert-none t:LUT3 t:LUT6 %% t:* %D
|
select -assert-none t:LUT3 t:LUT6 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux8 with widemux 5
|
||||||
|
|
||||||
|
design -load read
|
||||||
|
hierarchy -top mux8
|
||||||
|
proc
|
||||||
|
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad -widemux 5 # equivalency check
|
||||||
|
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
|
||||||
|
cd mux8 # Constrain all select calls below inside the top module
|
||||||
|
select -assert-count 2 t:LUT6
|
||||||
|
select -assert-count 1 t:MUXF7
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
|
select -assert-none t:LUT6 t:MUXF7 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux12 with widemux 5
|
||||||
|
# There is no equivalence check because selection values 12 to 15 are unspecified
|
||||||
|
|
||||||
|
design -load read
|
||||||
|
hierarchy -top mux12
|
||||||
|
proc
|
||||||
|
synth_xilinx -noiopad -widemux 5
|
||||||
|
cd mux12 # Constrain all select calls below inside the top module
|
||||||
|
select -assert-count 3 t:LUT6
|
||||||
|
select -assert-max 2 t:MUXF7
|
||||||
|
select -assert-count 1 t:MUXF8
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
|
select -assert-none t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux16 without widemux
|
||||||
|
|
||||||
design -load read
|
design -load read
|
||||||
hierarchy -top mux16
|
hierarchy -top mux16
|
||||||
proc
|
proc
|
||||||
|
@ -47,4 +91,45 @@ select -assert-max 7 t:LUT6
|
||||||
select -assert-max 2 t:MUXF7
|
select -assert-max 2 t:MUXF7
|
||||||
dump
|
dump
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
select -assert-none t:LUT6 t:LUT4 t:LUT3 t:MUXF7 %% t:* %D
|
select -assert-none t:LUT6 t:LUT4 t:LUT3 t:MUXF7 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux16 with widemux 5
|
||||||
|
|
||||||
|
design -load read
|
||||||
|
hierarchy -top mux16
|
||||||
|
proc
|
||||||
|
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad -widemux 5 # equivalency check
|
||||||
|
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
|
||||||
|
cd mux16 # Constrain all select calls below inside the top module
|
||||||
|
select -assert-count 4 t:LUT6
|
||||||
|
select -assert-count 2 t:MUXF7
|
||||||
|
select -assert-count 1 t:MUXF8
|
||||||
|
dump
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
|
select -assert-none t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
# mux20 with widemux 5
|
||||||
|
# Expect one mux16 (4 lut6 + 2 muxf7 + muxf8) + one mux4 (one lut6), then one mux2 (one lut3)
|
||||||
|
# These mapping results are achieved only with abc9 (without abc, we get undesired additional muxf7/muxf8)
|
||||||
|
# There is no equivalence check because selection values 20 to 31 are unspecified
|
||||||
|
|
||||||
|
design -load read
|
||||||
|
hierarchy -top mux20
|
||||||
|
proc
|
||||||
|
scratchpad -set abc9.D 5000 # Set a period high enough so we get area-optimized result
|
||||||
|
synth_xilinx -noiopad -widemux 5 -abc9
|
||||||
|
cd mux20 # Constrain all select calls below inside the top module
|
||||||
|
select -assert-count 1 t:LUT3
|
||||||
|
select -assert-count 5 t:LUT6
|
||||||
|
select -assert-count 2 t:MUXF7
|
||||||
|
select -assert-count 1 t:MUXF8
|
||||||
|
dump
|
||||||
|
|
||||||
|
# Ensure there are no other cells
|
||||||
|
select -assert-none t:LUT3 t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue