3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-08-03 01:40:23 +00:00
This commit is contained in:
Adrien Prost-Boucle 2025-07-31 13:22:05 +02:00 committed by GitHub
commit 91e0486e5c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 176 additions and 36 deletions

View file

@ -195,20 +195,18 @@ module \$__XILINX_SHIFTX (A, B, Y);
else if (A_WIDTH < `MIN_MUX_INPUTS) begin
wire _TECHMAP_FAIL_ = 1;
end
else if (A_WIDTH == 2) begin
MUXF7 fpga_hard_mux (.I0(A[0]), .I1(A[1]), .S(B[0]), .O(Y));
else if (A_WIDTH == 1) begin
assign Y = A[0];
end
// Use one LUT3 instead of a MUXF7 because a MUXF7 gets its inputs from 2 LUT6.O6 anyway
else if (A_WIDTH == 2) begin
assign Y = B[0] ? A[1] : A[0];
end
// Use one LUT6 instead of 2 LUT3 + MUXF7 because a MUXF7 gets its inputs from 2 LUT6.O6 anyway
else if (A_WIDTH <= 4) begin
wire [4-1:0] Ax;
if (A_WIDTH == 4)
assign Ax = A;
else
// Rather than extend with 1'bx which gets flattened to 1'b0
// causing the "don't care" status to get lost, extend with
// the same driver of F7B.I0 so that we can optimise F7B away
// later
assign Ax = {A[1], A};
\$__XILINX_MUXF78 fpga_hard_mux (.I0(Ax[0]), .I1(Ax[2]), .I2(Ax[1]), .I3(Ax[3]), .S0(B[1]), .S1(B[0]), .O(Y));
wire [4-1:0] Ax = {{(4-A_WIDTH){1'bx}}, A};
assign Y = B[1] ? (B[0] ? Ax[3] : Ax[2])
: (B[0] ? Ax[1] : Ax[0]);
end
// Note that the following decompositions are 'backwards' in that
// the LSBs are placed on the hard resources, and the soft resources
@ -232,15 +230,46 @@ module \$__XILINX_SHIFTX (A, B, Y);
// but that the 'backwards' mapping (left) is more delay efficient
// since smaller LUTs are faster than wider ones.
else if (A_WIDTH <= 8) begin
wire [8-1:0] Ax = {{{8-A_WIDTH}{1'bx}}, A};
wire T0 = B[2] ? Ax[4] : Ax[0];
wire T1 = B[2] ? Ax[5] : Ax[1];
wire T2 = B[2] ? Ax[6] : Ax[2];
wire T3 = B[2] ? Ax[7] : Ax[3];
\$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y));
wire [8-1:0] Ax = {{(8-A_WIDTH){1'bx}}, A};
// For 5-8 inputs, there are 2 possible implementations:
// - Using 4 LUT3 + 2 MUXF7 + MUXF8
// The MUXF7 inputs come from LUT6.O6 outputs so at best this would mean I5=1, inputs I4-I3-I2 for the MUX, leaving only I1-I0 available for other logic
// This means only 4 other LUT2 operations can be mapped to the slice (or larger ones assuming input sharing)
// - Using 2 LUT6 + MUXF7
// This leaves 2 LUT6 + 2 inputs Cx and Dx available for other logic
// The solution used here is 2 LUT6 + MUXF7 for the following reasons :
// - Area report is much closer to what a user would expect
// - The rest of the slice is probably easier to use by place and route tools
// - Delay should be rather similar
wire T0 = B[1] ? (B[2] ? Ax[6] : Ax[2])
: (B[2] ? Ax[4] : Ax[0]);
wire T1 = B[1] ? (B[2] ? Ax[7] : Ax[3])
: (B[2] ? Ax[5] : Ax[1]);
MUXF7 fpga_hard_mux (.I0(T0), .I1(T1), .S(B[0]), .O(Y));
end
else if (A_WIDTH <= 12) begin
wire [12-1:0] Ax = {{(12-A_WIDTH){1'bx}}, A};
// For 9-12 inputs, only 3 LUT6 are needed
// Note that an explicit user objective of optimization for delay might make the mux16 below preferrable
// (not a binary decision though : the overall design would be larger, which is not good for wire delay)
wire T0 = B[1] ? (B[0] ? Ax[ 3] : Ax[ 2])
: (B[0] ? Ax[ 1] : Ax[ 0]);
wire T1 = B[1] ? (B[0] ? Ax[ 7] : Ax[ 6])
: (B[0] ? Ax[ 5] : Ax[ 4]);
wire T2 = B[1] ? (B[0] ? Ax[11] : Ax[10])
: (B[0] ? Ax[ 9] : Ax[ 8]);
// Set parameters _TECHMAP_* to indicate that I2===I3 so that the upper MUXF7 is bypassed (assuming this is well handled by pnr tools)
\$__XILINX_MUXF78 #(
._TECHMAP_BITS_CONNMAP_(2),
._TECHMAP_CONNMAP_I0_(2'd0),
._TECHMAP_CONNMAP_I1_(2'd1),
._TECHMAP_CONNMAP_I2_(2'd2),
._TECHMAP_CONNMAP_I3_(2'd2)
) fpga_hard_mux (.I0(T0), .I1(T1), .I2(T2), .I3(T2), .S0(B[2]), .S1(B[3]), .O(Y));
end
else if (A_WIDTH <= 16) begin
wire [16-1:0] Ax = {{{16-A_WIDTH}{1'bx}}, A};
// For 13-16 inputs, use the full slice with 'backwards' decomposition described above
wire [16-1:0] Ax = {{(16-A_WIDTH){1'bx}}, A};
wire T0 = B[2] ? B[3] ? Ax[12] : Ax[4]
: B[3] ? Ax[ 8] : Ax[0];
wire T1 = B[2] ? B[3] ? Ax[13] : Ax[5]
@ -252,32 +281,36 @@ module \$__XILINX_SHIFTX (A, B, Y);
\$__XILINX_MUXF78 fpga_hard_mux (.I0(T0), .I1(T2), .I2(T1), .I3(T3), .S0(B[1]), .S1(B[0]), .O(Y));
end
else begin
// For more than 16 inputs, recursively split into sub-multiplexers of size at most 16
localparam num_mux16 = (A_WIDTH+15) / 16;
localparam clog2_num_mux16 = $clog2(num_mux16);
wire [num_mux16-1:0] T;
wire [num_mux16*16-1:0] Ax = {{(num_mux16*16-A_WIDTH){1'bx}}, A};
for (i = 0; i < num_mux16; i++)
for (i = 0; i < num_mux16; i++) begin
localparam local_num_in = (A_WIDTH-i*16 < 16) ? A_WIDTH-i*16 : 16;
localparam clog2_num_in = $clog2(local_num_in);
\$__XILINX_SHIFTX #(
.A_SIGNED(A_SIGNED),
.B_SIGNED(B_SIGNED),
.A_WIDTH(16),
.B_WIDTH(4),
.A_WIDTH(local_num_in),
.B_WIDTH(clog2_num_in),
.Y_WIDTH(Y_WIDTH)
) fpga_mux (
.A(Ax[i*16+:16]),
.B(B[3:0]),
.A(A[i*16+:local_num_in]),
.B(B[clog2_num_in-1:0]),
.Y(T[i])
);
end
\$__XILINX_SHIFTX #(
.A_SIGNED(A_SIGNED),
.B_SIGNED(B_SIGNED),
.A_WIDTH(num_mux16),
.B_WIDTH(clog2_num_mux16),
.Y_WIDTH(Y_WIDTH)
.A_SIGNED(A_SIGNED),
.B_SIGNED(B_SIGNED),
.A_WIDTH(num_mux16),
.B_WIDTH(clog2_num_mux16),
.Y_WIDTH(Y_WIDTH)
) _TECHMAP_REPLACE_ (
.A(T),
.B(B[B_WIDTH-1-:clog2_num_mux16]),
.Y(Y));
.A(T),
.B(B[4+:clog2_num_mux16]),
.Y(Y)
);
end
endgenerate
endmodule

View file

@ -51,10 +51,32 @@ module mux8 ( S, D, Y );
end
endmodule
module mux12 (D, S, Y);
input [11:0] D;
input [3:0] S;
output Y;
wire[15:0] D16;
assign D16 = {4'bx, D};
assign Y = D16[S];
endmodule
module mux16 (D, S, Y);
input [15:0] D;
input [3:0] S;
output Y;
input [15:0] D;
input [3:0] S;
output Y;
assign Y = D[S];
endmodule
module mux20 (D, S, Y);
input [19:0] D;
input [4:0] S;
output Y;
wire[31:0] D32;
assign D32 = {12'bx, D};
assign Y = D32[S];
endmodule

View file

@ -1,6 +1,10 @@
read_verilog ../common/mux.v
design -save read
# mux2
hierarchy -top mux2
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad # equivalency check
@ -8,9 +12,12 @@ design -load postopt # load the post-opt design (otherwise equiv_opt loads the p
cd mux2 # Constrain all select calls below inside the top module
select -assert-count 1 t:LUT3
# Ensure there are no other cells
select -assert-none t:LUT3 %% t:* %D
# mux4
design -load read
hierarchy -top mux4
proc
@ -19,9 +26,12 @@ design -load postopt # load the post-opt design (otherwise equiv_opt loads the p
cd mux4 # Constrain all select calls below inside the top module
select -assert-count 1 t:LUT6
# Ensure there are no other cells
select -assert-none t:LUT6 %% t:* %D
# mux8 without widemux
design -load read
hierarchy -top mux8
proc
@ -31,9 +41,43 @@ cd mux8 # Constrain all select calls below inside the top module
select -assert-count 1 t:LUT3
select -assert-count 2 t:LUT6
# Ensure there are no other cells
select -assert-none t:LUT3 t:LUT6 %% t:* %D
# mux8 with widemux 5
design -load read
hierarchy -top mux8
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad -widemux 5 # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux8 # Constrain all select calls below inside the top module
select -assert-count 2 t:LUT6
select -assert-count 1 t:MUXF7
# Ensure there are no other cells
select -assert-none t:LUT6 t:MUXF7 %% t:* %D
# mux12 with widemux 5
# There is no equivalence check because selection values 12 to 15 are unspecified
design -load read
hierarchy -top mux12
proc
synth_xilinx -noiopad -widemux 5
cd mux12 # Constrain all select calls below inside the top module
select -assert-count 3 t:LUT6
select -assert-max 2 t:MUXF7
select -assert-count 1 t:MUXF8
# Ensure there are no other cells
select -assert-none t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D
# mux16 without widemux
design -load read
hierarchy -top mux16
proc
@ -47,4 +91,45 @@ select -assert-max 7 t:LUT6
select -assert-max 2 t:MUXF7
dump
# Ensure there are no other cells
select -assert-none t:LUT6 t:LUT4 t:LUT3 t:MUXF7 %% t:* %D
# mux16 with widemux 5
design -load read
hierarchy -top mux16
proc
equiv_opt -assert -map +/xilinx/cells_sim.v synth_xilinx -noiopad -widemux 5 # equivalency check
design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
cd mux16 # Constrain all select calls below inside the top module
select -assert-count 4 t:LUT6
select -assert-count 2 t:MUXF7
select -assert-count 1 t:MUXF8
dump
# Ensure there are no other cells
select -assert-none t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D
# mux20 with widemux 5
# Expect one mux16 (4 lut6 + 2 muxf7 + muxf8) + one mux4 (one lut6), then one mux2 (one lut3)
# These mapping results are achieved only with abc9 (without abc, we get undesired additional muxf7/muxf8)
# There is no equivalence check because selection values 20 to 31 are unspecified
design -load read
hierarchy -top mux20
proc
scratchpad -set abc9.D 5000 # Set a period high enough so we get area-optimized result
synth_xilinx -noiopad -widemux 5 -abc9
cd mux20 # Constrain all select calls below inside the top module
select -assert-count 1 t:LUT3
select -assert-count 5 t:LUT6
select -assert-count 2 t:MUXF7
select -assert-count 1 t:MUXF8
dump
# Ensure there are no other cells
select -assert-none t:LUT3 t:LUT6 t:MUXF7 t:MUXF8 %% t:* %D