From a580a7c82c288e4821f1a44829d6b2aa8badf323 Mon Sep 17 00:00:00 2001
From: "Emil J. Tywoniak" <emil@tywoniak.eu>
Date: Wed, 3 Apr 2024 20:37:54 +0200
Subject: [PATCH 1/3] docs: Document $macc

---
 .../yosys_internals/formats/cell_library.rst  | 48 ++++++++++++++++-
 techlibs/common/simlib.v                      | 51 +++++++++++++++++--
 2 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/docs/source/yosys_internals/formats/cell_library.rst b/docs/source/yosys_internals/formats/cell_library.rst
index c80b07402..a4e5adfb7 100644
--- a/docs/source/yosys_internals/formats/cell_library.rst
+++ b/docs/source/yosys_internals/formats/cell_library.rst
@@ -619,6 +619,52 @@ Finite state machines
 
 Add a brief description of the ``$fsm`` cell type.
 
+Coarse arithmetics
+~~~~~~~~~~~~~~~~~~~~~
+
+The ``$macc`` cell type represents a multiply and accumulate block, for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders. 
+
+The cell's ``CONFIG`` parameter determines the layout of cell port ``A``.
+In the terms used for this cell, there's mixed meanings for the term "port". To disambiguate:
+A cell port is for example the A input (it is constructed in C++ as ``cell->setPort(ID::A, ...))``
+Multiplier ports are pairs of multiplier inputs ("factors").
+If the second signal in such a pair is zero length, no multiplication is necessary, and the first signal is just added to the sum.
+
+In this pseudocode, ``u(foo)`` means an unsigned int that's foo bits long.
+The CONFIG parameter carries the following information:
+.. code-block::
+	:force:
+	struct CONFIG {
+		u4 num_bits;
+		struct port_field {
+			bool is_signed;
+			bool is_subtract;
+			u(num_bits) factor1_len;
+			u(num_bits) factor2_len;
+		}[num_ports];
+	};
+
+The A cell port carries the following information:
+.. code-block::
+	:force:
+	struct A {
+		u(CONFIG.port_field[0].factor1_len) port0factor1;
+		u(CONFIG.port_field[0].factor2_len) port0factor2;
+		u(CONFIG.port_field[1].factor1_len) port1factor1;
+		u(CONFIG.port_field[1].factor2_len) port1factor2;
+		...
+	};
+
+No factor1 may have a zero length.
+A factor2 having a zero length implies factor2 is replaced with a constant 1.
+
+Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
+Finally, we have:
+.. code-block::
+	:force:
+	Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
+		* B[0] + B[1] + ...
+
 Specify rules
 ~~~~~~~~~~~~~
 
@@ -1152,4 +1198,4 @@ file via ABC using the abc pass.
 
 .. todo:: Add information about ``$lut`` and ``$sop`` cells.
 
-.. todo:: Add information about ``$alu``, ``$macc``, ``$fa``, and ``$lcu`` cells.
+.. todo:: Add information about ``$alu``, ``$fa``, and ``$lcu`` cells.
diff --git a/techlibs/common/simlib.v b/techlibs/common/simlib.v
index 489281f26..1383a2a13 100644
--- a/techlibs/common/simlib.v
+++ b/techlibs/common/simlib.v
@@ -902,18 +902,29 @@ endgenerate
 endmodule
 
 // --------------------------------------------------------
-
+//  |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+//-
+//-     $macc (A, B, Y)
+//-
+//- Multiply and accumulate.
+//- A building block for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders.
+//- Typically created by the `alumacc` pass, which transforms $add and $mul into $macc cells.
 module \$macc (A, B, Y);
 
 parameter A_WIDTH = 0;
 parameter B_WIDTH = 0;
 parameter Y_WIDTH = 0;
+// CONFIG determines the layout of A, as explained below
 parameter CONFIG = 4'b0000;
 parameter CONFIG_WIDTH = 4;
 
-input [A_WIDTH-1:0] A;
-input [B_WIDTH-1:0] B;
-output reg [Y_WIDTH-1:0] Y;
+// In the terms used for this cell, there's mixed meanings for the term "port". To disambiguate:
+// A cell port is for example the A input (it is constructed in C++ as cell->setPort(ID::A, ...))
+// Multiplier ports are pairs of multiplier inputs ("factors").
+// If the second signal in such a pair is zero length, no multiplication is necessary, and the first signal is just added to the sum.
+input [A_WIDTH-1:0] A; // Cell port A is the concatenation of all arithmetic ports
+input [B_WIDTH-1:0] B; // Cell port B is the concatenation of single-bit unsigned signals to be also added to the sum
+output reg [Y_WIDTH-1:0] Y; // Output sum
 
 // Xilinx XSIM does not like $clog2() below..
 function integer my_clog2;
@@ -929,10 +940,42 @@ function integer my_clog2;
 	end
 endfunction
 
+// Bits that a factor's length field in CONFIG per factor in cell port A
 localparam integer num_bits = CONFIG[3:0] > 0 ? CONFIG[3:0] : 1;
+// Number of multiplier ports
 localparam integer num_ports = (CONFIG_WIDTH-4) / (2 + 2*num_bits);
+// Minium bit width of an induction variable to iterate over all bits of cell port A
 localparam integer num_abits = my_clog2(A_WIDTH) > 0 ? my_clog2(A_WIDTH) : 1;
 
+// In this pseudocode, u(foo) means an unsigned int that's foo bits long.
+// The CONFIG parameter carries the following information:
+//	struct CONFIG {
+//		u4 num_bits;
+//		struct port_field {
+//			bool is_signed;
+//			bool is_subtract;
+//			u(num_bits) factor1_len;
+//			u(num_bits) factor2_len;
+//		}[num_ports];
+//	};
+
+// The A cell port carries the following information:
+//	struct A {
+//		u(CONFIG.port_field[0].factor1_len) port0factor1;
+//		u(CONFIG.port_field[0].factor2_len) port0factor2;
+//		u(CONFIG.port_field[1].factor1_len) port1factor1;
+//		u(CONFIG.port_field[1].factor2_len) port1factor2;
+//		...
+//	};
+// and log(sizeof(A)) is num_abits.
+// No factor1 may have a zero length.
+// A factor2 having a zero length implies factor2 is replaced with a constant 1.
+
+// Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
+// Finally, we have:
+// Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
+//     * B[0] + B[1] + ...
+
 function [2*num_ports*num_abits-1:0] get_port_offsets;
 	input [CONFIG_WIDTH-1:0] cfg;
 	integer i, cursor;

From 9510293a9472a239f448cc7dc7b1bed7f1a35e36 Mon Sep 17 00:00:00 2001
From: "Emil J. Tywoniak" <emil@tywoniak.eu>
Date: Thu, 4 Apr 2024 18:16:58 +0200
Subject: [PATCH 2/3] fixup

---
 docs/source/yosys_internals/formats/cell_library.rst | 11 +++++++----
 techlibs/common/simlib.v                             |  9 +++++++--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/docs/source/yosys_internals/formats/cell_library.rst b/docs/source/yosys_internals/formats/cell_library.rst
index a4e5adfb7..1e0012f46 100644
--- a/docs/source/yosys_internals/formats/cell_library.rst
+++ b/docs/source/yosys_internals/formats/cell_library.rst
@@ -632,8 +632,9 @@ If the second signal in such a pair is zero length, no multiplication is necessa
 
 In this pseudocode, ``u(foo)`` means an unsigned int that's foo bits long.
 The CONFIG parameter carries the following information:
+
 .. code-block::
-	:force:
+
 	struct CONFIG {
 		u4 num_bits;
 		struct port_field {
@@ -645,8 +646,9 @@ The CONFIG parameter carries the following information:
 	};
 
 The A cell port carries the following information:
+
 .. code-block::
-	:force:
+
 	struct A {
 		u(CONFIG.port_field[0].factor1_len) port0factor1;
 		u(CONFIG.port_field[0].factor2_len) port0factor2;
@@ -660,10 +662,11 @@ A factor2 having a zero length implies factor2 is replaced with a constant 1.
 
 Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
 Finally, we have:
+
 .. code-block::
-	:force:
+
 	Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
-		* B[0] + B[1] + ...
+	     + B[0] + B[1] + ...
 
 Specify rules
 ~~~~~~~~~~~~~
diff --git a/techlibs/common/simlib.v b/techlibs/common/simlib.v
index 1383a2a13..7dc03da6d 100644
--- a/techlibs/common/simlib.v
+++ b/techlibs/common/simlib.v
@@ -907,8 +907,13 @@ endmodule
 //-     $macc (A, B, Y)
 //-
 //- Multiply and accumulate.
-//- A building block for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders.
-//- Typically created by the `alumacc` pass, which transforms $add and $mul into $macc cells.
+//- A building block for summing any number of negated and unnegated signals
+//- and arithmetic products of pairs of signals. Cell port A concatenates pairs
+//- of signals to be multiplied together. When the second signal in a pair is zero
+//- length, a constant 1 is used instead as the second factor. Cell port B
+//- concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders.
+//- Typically created by the `alumacc` pass, which transforms $add and $mul
+//- into $macc cells.
 module \$macc (A, B, Y);
 
 parameter A_WIDTH = 0;

From 43ef916f8683e68bbaee9f2358f3719d5000bf1a Mon Sep 17 00:00:00 2001
From: "Emil J. Tywoniak" <emil@tywoniak.eu>
Date: Fri, 5 Apr 2024 14:01:25 +0200
Subject: [PATCH 3/3] Restructure rst

---
 .../yosys_internals/formats/cell_library.rst  | 51 ++++++++-----------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/docs/source/yosys_internals/formats/cell_library.rst b/docs/source/yosys_internals/formats/cell_library.rst
index 1e0012f46..2b8dc3001 100644
--- a/docs/source/yosys_internals/formats/cell_library.rst
+++ b/docs/source/yosys_internals/formats/cell_library.rst
@@ -622,22 +622,36 @@ Add a brief description of the ``$fsm`` cell type.
 Coarse arithmetics
 ~~~~~~~~~~~~~~~~~~~~~
 
-The ``$macc`` cell type represents a multiply and accumulate block, for summing any number of negated and unnegated signals and arithmetic products of pairs of signals. Cell port A concatenates pairs of signals to be multiplied together. When the second signal in a pair is zero length, a constant 1 is used instead as the second factor. Cell port B concatenates 1-bit-wide signals to also be summed, such as "carry in" in adders. 
+The ``$macc`` cell type represents a generalized multiply and accumulate operation. The cell is purely combinational. It outputs the result of summing up a sequence of products and other injected summands.
 
-The cell's ``CONFIG`` parameter determines the layout of cell port ``A``.
-In the terms used for this cell, there's mixed meanings for the term "port". To disambiguate:
-A cell port is for example the A input (it is constructed in C++ as ``cell->setPort(ID::A, ...))``
-Multiplier ports are pairs of multiplier inputs ("factors").
-If the second signal in such a pair is zero length, no multiplication is necessary, and the first signal is just added to the sum.
+.. code-block::
+
+	Y = 0 +- a0factor1 * a0factor2 +- a1factor1 * a1factor2 +- ...
+	     + B[0] + B[1] + ...
+
+The A port consists of concatenated pairs of multiplier inputs ("factors").
+A zero length factor2 acts as a constant 1, turning factor1 into a simple summand.
 
 In this pseudocode, ``u(foo)`` means an unsigned int that's foo bits long.
+
+.. code-block::
+
+	struct A {
+		u(CONFIG.mul_info[0].factor1_len) a0factor1;
+		u(CONFIG.mul_info[0].factor2_len) a0factor2;
+		u(CONFIG.mul_info[1].factor1_len) a1factor1;
+		u(CONFIG.mul_info[1].factor2_len) a1factor2;
+		...
+	};
+
+The cell's ``CONFIG`` parameter determines the layout of cell port ``A``.
 The CONFIG parameter carries the following information:
 
 .. code-block::
 
 	struct CONFIG {
 		u4 num_bits;
-		struct port_field {
+		struct mul_info {
 			bool is_signed;
 			bool is_subtract;
 			u(num_bits) factor1_len;
@@ -645,28 +659,7 @@ The CONFIG parameter carries the following information:
 		}[num_ports];
 	};
 
-The A cell port carries the following information:
-
-.. code-block::
-
-	struct A {
-		u(CONFIG.port_field[0].factor1_len) port0factor1;
-		u(CONFIG.port_field[0].factor2_len) port0factor2;
-		u(CONFIG.port_field[1].factor1_len) port1factor1;
-		u(CONFIG.port_field[1].factor2_len) port1factor2;
-		...
-	};
-
-No factor1 may have a zero length.
-A factor2 having a zero length implies factor2 is replaced with a constant 1.
-
-Additionally, B is an array of 1-bit-wide unsigned integers to also be summed up.
-Finally, we have:
-
-.. code-block::
-
-	Y = port0factor1 * port0factor2 + port1factor1 * port1factor2 + ...
-	     + B[0] + B[1] + ...
+B is an array of concatenated 1-bit-wide unsigned integers to also be summed up.
 
 Specify rules
 ~~~~~~~~~~~~~