diff --git a/kernel/functionalir.cc b/kernel/functionalir.cc
index 6983facfb..b8fa6b88e 100644
--- a/kernel/functionalir.cc
+++ b/kernel/functionalir.cc
@@ -226,26 +226,36 @@ public:
 			T b = extend(inputs.at(ID(B)), b_width, width, is_signed);
 			if(is_signed) {
 				if(cellType == ID($div)) {
+					// divide absolute values, then flip the sign if input signs differ
+					// but extend the width first, to handle the case (most negative value) / (-1)
 					T abs_y = factory.unsigned_div(abs(a, width), abs(b, width), width);
 					T out_sign = factory.not_equal(sign(a, width), sign(b, width), 1);
-					return neg_if(extend(abs_y, width, y_width, true), y_width, out_sign);
+					return neg_if(extend(abs_y, width, y_width, false), y_width, out_sign);
 				} else if(cellType == ID($mod)) {
+					// similar to division but output sign == divisor sign
 					T abs_y = factory.unsigned_mod(abs(a, width), abs(b, width), width);
-					return neg_if(extend(abs_y, width, y_width, true), y_width, sign(a, width));
+					return neg_if(extend(abs_y, width, y_width, false), y_width, sign(a, width));
 				} else if(cellType == ID($divfloor)) {
+					// if b is negative, flip both signs so that b is positive
 					T b_sign = sign(b, width);
 					T a1 = neg_if(a, width, b_sign);
 					T b1 = neg_if(b, width, b_sign);
-					T a1_sign = sign(a1, width);
+					// if a is now negative, calculate ~((~a) / b) = -((-a - 1) / b + 1)
+					// which equals the negative of (-a) / b with rounding up rather than down
+					// note that to handle the case where a = most negative value properly,
+					// we have to calculate a1_sign from the original values rather than using sign(a1, width)
+					T a1_sign = factory.bitwise_and(factory.not_equal(sign(a, width), sign(b, width), 1), reduce_or(a, width), 1);
 					T a2 = factory.mux(a1, factory.bitwise_not(a1, width), a1_sign, width);
 					T y1 = factory.unsigned_div(a2, b1, width);
-					T y2 = factory.mux(y1, factory.bitwise_not(y1, width), a1_sign, width);
-					return extend(y2, width, y_width, true);
+					T y2 = extend(y1, width, y_width, false);
+					return factory.mux(y2, factory.bitwise_not(y2, y_width), a1_sign, y_width);
 				} else if(cellType == ID($modfloor)) {
+					// calculate |a| % |b| and then subtract from |b| if input signs differ and the remainder is non-zero
 					T abs_b = abs(b, width);
 					T abs_y = factory.unsigned_mod(abs(a, width), abs_b, width);
 					T flip_y = factory.bitwise_and(factory.bitwise_xor(sign(a, width), sign(b, width), 1), factory.reduce_or(abs_y, width), 1);
 					T y_flipped = factory.mux(abs_y, factory.sub(abs_b, abs_y, width), flip_y, width);
+					// since y_flipped is strictly less than |b|, the top bit is always 0 and we can just sign extend the flipped result
 					T y = neg_if(y_flipped, width, sign(b, b_width));
 					return extend(y, width, y_width, true);
 				} else
@@ -261,22 +271,8 @@ public:
 		} else if (cellType == ID($lut)) {
 			int width = parameters.at(ID(WIDTH)).as_int();
 			Const lut_table = parameters.at(ID(LUT));
-			T a = inputs.at(ID(A));
-			// Output initialization
-			T y = factory.constant(Const(0, 1));
-			// Iterate over each possible input combination
-			for (int i = 0; i < (1 << width); ++i) {
-				// Create a constant representing the value of i
-				T i_val = factory.constant(Const(i, width));
-				// Check if the input matches this value
-				T match = factory.equal(a, i_val, width);
-				// Get the corresponding LUT value
-				bool lut_val = lut_table.bits[i] == State::S1;
-				T lut_output = factory.constant(Const(lut_val, 1));
-				// Use a multiplexer to select the correct output based on the match
-				y = factory.mux(y, lut_output, match, 1);
-			}
-			return y;
+			lut_table.extu(1 << width);
+			return handle_bmux(factory.constant(lut_table), inputs.at(ID(A)), 1 << width, 0, 1, width, width);
 		} else if (cellType == ID($bwmux)) {
 			int width = parameters.at(ID(WIDTH)).as_int();
 			T a = inputs.at(ID(A));
@@ -526,7 +522,7 @@ void FunctionalIR::topological_sort() {
     if(scc) log_error("combinational loops, aborting\n");
 }
 
-IdString merge_name(IdString a, IdString b) {
+static IdString merge_name(IdString a, IdString b) {
 	if(a[0] == '$' && b[0] == '\\')
 		return b;
 	else
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
index 3a66abaab..12db7f1d0 100644
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -1,14 +1,30 @@
 import pytest
 from rtlil_cells import generate_test_cases
+import random
+
+random_seed = random.getrandbits(32)
 
 def pytest_addoption(parser):
-    parser.addoption(
-        "--per-cell", type=int, default=None, help="run only N tests per cell"
-    )
+    parser.addoption("--per-cell", type=int, default=None, help="run only N tests per cell")
+    parser.addoption("--steps", type=int, default=1000, help="run each test for N steps")
+    parser.addoption("--seed", type=int, default=random_seed, help="seed for random number generation, use random seed if unspecified")
+
+def pytest_collection_finish(session):
+    print('random seed: {}'.format(session.config.getoption("seed")))
+
+@pytest.fixture
+def num_steps(request):
+    return request.config.getoption("steps")
+
+@pytest.fixture
+def rnd(request):
+    seed1 = request.config.getoption("seed")
+    return lambda seed2: random.Random('{}-{}'.format(seed1, seed2))
 
 def pytest_generate_tests(metafunc):
     if "cell" in metafunc.fixturenames:
-        print(dir(metafunc.config))
         per_cell = metafunc.config.getoption("per_cell", default=None)
-        names, cases = generate_test_cases(per_cell)
+        seed1 = metafunc.config.getoption("seed")
+        rnd = lambda seed2: random.Random('{}-{}'.format(seed1, seed2))
+        names, cases = generate_test_cases(per_cell, rnd)
         metafunc.parametrize("cell,parameters", cases, ids=names)
\ No newline at end of file
diff --git a/tests/functional/rtlil_cells.py b/tests/functional/rtlil_cells.py
index e96baa18e..b4c0ad1c1 100644
--- a/tests/functional/rtlil_cells.py
+++ b/tests/functional/rtlil_cells.py
@@ -1,30 +1,6 @@
 from itertools import chain
 import random
 
-widths = [
-    (16, 32, 48, True),
-    (16, 32, 48, False),
-    (32, 16, 48, True),
-    (32, 16, 48, False),
-    (32, 32, 16, True),
-    (32, 32, 16, False)
-]
-
-shift_widths = [
-    (32, 6, 32, True, False),
-    (32, 6, 32, False, False),
-    (32, 6, 64, True, False),
-    (32, 6, 64, False, False),
-    (32, 32, 16, True, False),
-    (32, 32, 16, False, False),
-    (32, 6, 32, True, True),
-    (32, 6, 32, False, True),
-    (32, 6, 64, True, True),
-    (32, 6, 64, False, True),
-    (32, 32, 16, True, True),
-    (32, 32, 16, False, True),
-]
-
 def write_rtlil_cell(f, cell_type, inputs, outputs, parameters):
     f.write('autoidx 1\n')
     f.write('module \\gold\n')
@@ -37,207 +13,260 @@ def write_rtlil_cell(f, cell_type, inputs, outputs, parameters):
         idx += 1
     f.write(f'\tcell ${cell_type} \\UUT\n')
     for (name, value) in parameters.items():
-        f.write(f'\t\tparameter \\{name} {value}\n')
+        if value >= 2**32:
+            f.write(f'\t\tparameter \\{name} {value.bit_length()}\'{value:b}\n')
+        else:
+            f.write(f'\t\tparameter \\{name} {value}\n')
     for name in chain(inputs.keys(), outputs.keys()):
         f.write(f'\t\tconnect \\{name} \\{name}\n')
     f.write(f'\tend\nend\n')
 
 class BaseCell:
-    def __init__(self, name):
+    def __init__(self, name, parameters, inputs, outputs, test_values):
         self.name = name
+        self.parameters = parameters
+        self.inputs = inputs
+        self.outputs = outputs
+        self.test_values = test_values
+    def get_port_width(self, port, parameters):
+        def parse_specifier(spec):
+            if isinstance(spec, int):
+                return spec
+            if isinstance(spec, str):
+                return parameters[spec]
+            if callable(spec):
+                return spec(parameters)
+            assert False, "expected int, str or lambda"
+        if port in self.inputs:
+            return parse_specifier(self.inputs[port])
+        elif port in self.outputs:
+            return parse_specifier(self.outputs[port])
+        else:
+            assert False, "expected input or output"
+    def generate_tests(self, rnd):
+        def print_parameter(v):
+            if isinstance(v, bool):
+                return "S" if v else "U"
+            else:
+                return str(v)
+        for values in self.test_values:
+            if isinstance(values, int):
+                values = [values]
+            name = '-'.join([print_parameter(v) for v in values])
+            parameters = {parameter: int(values[i]) for i, parameter in enumerate(self.parameters)}
+            if self.is_test_valid(values):
+                yield (name, parameters)
+    def write_rtlil_file(self, path, parameters):
+        inputs = {port: self.get_port_width(port, parameters) for port in self.inputs}
+        outputs = {port: self.get_port_width(port, parameters) for port in self.outputs}
+        with open(path, 'w') as f:
+            write_rtlil_cell(f, self.name, inputs, outputs, parameters)
+    def is_test_valid(self, values):
+        return True
 
 class UnaryCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (a_width, _, y_width, signed) in widths:
-            yield (f'{a_width}-{y_width}-{'S' if signed else 'U'}',
-                   {'A_WIDTH' : a_width,
-                    'A_SIGNED' : int(signed),
-                    'Y_WIDTH' : y_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['A_WIDTH']}, {'Y': parameters['Y_WIDTH']}, parameters)
+    def __init__(self, name, values):
+        super().__init__(name, ['A_WIDTH', 'Y_WIDTH', 'A_SIGNED'], {'A': 'A_WIDTH'}, {'Y': 'Y_WIDTH'}, values)
 
 class BinaryCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (a_width, b_width, y_width, signed) in widths:
-            yield (f'{a_width}-{b_width}-{y_width}-{'S' if signed else 'U'}',
-                   {'A_WIDTH' : a_width,
-                    'A_SIGNED' : int(signed),
-                    'B_WIDTH' : b_width,
-                    'B_SIGNED' : int(signed),
-                    'Y_WIDTH' : y_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['A_WIDTH'], 'B': parameters['B_WIDTH']}, {'Y': parameters['Y_WIDTH']}, parameters)
+    def __init__(self, name, values):
+        super().__init__(name, ['A_WIDTH', 'B_WIDTH', 'Y_WIDTH', 'A_SIGNED', 'B_SIGNED'], {'A': 'A_WIDTH', 'B': 'B_WIDTH'}, {'Y': 'Y_WIDTH'}, values)
 
 class ShiftCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (a_width, b_width, y_width, a_signed, b_signed) in shift_widths:
-            if not self.name in ('shift', 'shiftx') and b_signed: continue
-            if self.name == 'shiftx' and a_signed: continue
-            yield (f'{a_width}-{b_width}-{y_width}-{'S' if a_signed else 'U'}{'S' if b_signed else 'U'}',
-                   {'A_WIDTH' : a_width,
-                    'A_SIGNED' : int(a_signed),
-                    'B_WIDTH' : b_width,
-                    'B_SIGNED' : int(b_signed),
-                    'Y_WIDTH' : y_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['A_WIDTH'], 'B': parameters['B_WIDTH']}, {'Y': parameters['Y_WIDTH']}, parameters)
+    def __init__(self, name, values):
+        super().__init__(name,  ['A_WIDTH', 'B_WIDTH', 'Y_WIDTH', 'A_SIGNED', 'B_SIGNED'], {'A': 'A_WIDTH', 'B': 'B_WIDTH'}, {'Y': 'Y_WIDTH'}, values)
+    def is_test_valid(self, values):
+        (a_width, b_width, y_width, a_signed, b_signed) = values
+        if not self.name in ('shift', 'shiftx') and b_signed: return False
+        if self.name == 'shiftx' and a_signed: return False
+        return True
 
 class MuxCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for width in [10, 20, 40]:
-            yield (f'{width}', {'WIDTH' : width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['WIDTH'], 'B': parameters['WIDTH'], 'S': 1}, {'Y': parameters['WIDTH']}, parameters)
+    def __init__(self, name, values):
+        super().__init__(name, ['WIDTH'], {'A': 'WIDTH', 'B': 'WIDTH', 'S': 1}, {'Y': 'WIDTH'}, values)
 
 class BWCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for width in [10, 20, 40]:
-            yield (f'{width}', {'WIDTH' : width})
-    def write_rtlil_file(self, f, parameters):
-        inputs = {'A': parameters['WIDTH'], 'B': parameters['WIDTH']}
-        if self.name == "bwmux": inputs['S'] = parameters['WIDTH']
-        write_rtlil_cell(f, self.name, inputs, {'Y': parameters['WIDTH']}, parameters)
+    def __init__(self, name, values):
+        inputs = {'A': 'WIDTH', 'B': 'WIDTH'}
+        if name == "bwmux": inputs['S'] = 'WIDTH'
+        super().__init__(name, ['WIDTH'], inputs, {'Y': 'WIDTH'}, values)
 
 class PMuxCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (width, s_width) in [(10, 1), (10, 4), (20, 4)]:
-            yield (f'{width}-{s_width}',
-                   {'WIDTH' : width,
-                    'S_WIDTH' : s_width})
-    def write_rtlil_file(self, f, parameters):
-        s_width = parameters['S_WIDTH']
-        b_width = parameters['WIDTH'] * s_width
-        write_rtlil_cell(f, self.name, {'A': parameters['WIDTH'], 'B': b_width, 'S': s_width}, {'Y': parameters['WIDTH']}, parameters)
+    def __init__(self, name, values):
+        b_width = lambda par: par['WIDTH'] * par['S_WIDTH']
+        super().__init__(name, ['WIDTH', 'S_WIDTH'], {'A': 'WIDTH', 'B': b_width, 'S': 'S_WIDTH'}, {'Y': 'WIDTH'}, values)
 
 class BMuxCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (width, s_width) in [(10, 1), (10, 2), (10, 4)]:
-            yield (f'{width}-{s_width}', {'WIDTH' : width, 'S_WIDTH' : s_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['WIDTH'] << parameters['S_WIDTH'], 'S': parameters['S_WIDTH']}, {'Y': parameters['WIDTH']}, parameters)
+    def __init__(self, name, values):
+        a_width = lambda par: par['WIDTH'] << par['S_WIDTH']
+        super().__init__(name, ['WIDTH', 'S_WIDTH'], {'A': a_width, 'S': 'S_WIDTH'}, {'Y': 'WIDTH'}, values)
 
 class DemuxCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (width, s_width) in [(10, 1), (32, 2), (16, 4)]:
-            yield (f'{width}-{s_width}', {'WIDTH' : width, 'S_WIDTH' : s_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['WIDTH'], 'S': parameters['S_WIDTH']}, {'Y': parameters['WIDTH'] << parameters['S_WIDTH']}, parameters)
-
-def seeded_randint(seed, a, b):
-    r = random.getstate()
-    random.seed(seed)
-    n = random.randint(a, b)
-    random.setstate(r)
-    return n
+    def __init__(self, name, values):
+        y_width = lambda par: par['WIDTH'] << par['S_WIDTH']
+        super().__init__(name, ['WIDTH', 'S_WIDTH'], {'A': 'WIDTH', 'S': 'S_WIDTH'}, {'Y': y_width}, values)
 
 class LUTCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for width in [4, 6, 8]:
-            lut = seeded_randint(width, 0, 2**width - 1)
+    def __init__(self, name, values):
+        super().__init__(name, ['WIDTH', 'LUT'], {'A': 'WIDTH'}, {'Y': 1}, values)
+    def generate_tests(self, rnd):
+        for width in self.test_values:
+            lut = rnd(f'lut-{width}').getrandbits(2**width)
             yield (f'{width}', {'WIDTH' : width, 'LUT' : lut})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['WIDTH']}, {'Y': 1}, parameters)
 
 class ConcatCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (a_width, b_width) in [(16, 16), (8, 14), (20, 10)]:
-            yield (f'{a_width}-{b_width}', {'A_WIDTH' : a_width, 'B_WIDTH' : b_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['A_WIDTH'], 'B' : parameters['B_WIDTH']}, {'Y': parameters['A_WIDTH'] + parameters['B_WIDTH']}, parameters)
+    def __init__(self, name, values):
+        y_width = lambda par: par['A_WIDTH'] + par['B_WIDTH']
+        super().__init__(name, ['A_WIDTH', 'B_WIDTH'], {'A': 'A_WIDTH', 'B': 'B_WIDTH'}, {'Y': y_width}, values)
 
 class SliceCell(BaseCell):
-    def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
-        for (a_width, offset, y_width) in [(32, 10, 15), (8, 0, 4), (10, 0, 10)]:
-            yield (f'{a_width}-{offset}-{y_width}', {'A_WIDTH' : a_width, 'OFFSET' : offset, 'Y_WIDTH': y_width})
-    def write_rtlil_file(self, f, parameters):
-        write_rtlil_cell(f, self.name, {'A': parameters['A_WIDTH']}, {'Y': parameters['Y_WIDTH']}, parameters)
+    def __init__(self, name, values):
+        super().__init__(name, ['A_WIDTH', 'OFFSET', 'Y_WIDTH'], {'A': 'A_WIDTH'}, {'Y': 'Y_WIDTH'}, values)
 
 class FailCell(BaseCell):
     def __init__(self, name):
-        super().__init__(name)
-    def generate_tests(self):
+        super().__init__(name, [], {}, {})
+    def generate_tests(self, rnd):
         yield ('', {})
-    def write_rtlil_file(self, f, parameters):
+    def write_rtlil_file(self, path, parameters):
         raise Exception(f'\'{self.name}\' cell unimplemented in test generator')
 
+class FFCell(BaseCell):
+    def __init__(self, name, values):
+        super().__init__(name, ['WIDTH'], ['D'], ['Q'], values)
+    def write_rtlil_file(self, path, parameters):
+        from test_functional import yosys_synth
+        verilog_file = path.parent / 'verilog.v'
+        with open(verilog_file, 'w') as f:
+            f.write("""
+module gold(
+    input wire clk,
+    input wire [{0}:0] D,
+    output reg [{0}:0] Q
+);
+    always @(posedge clk)
+        Q <= D;
+endmodule""".format(parameters['WIDTH'] - 1))
+        yosys_synth(verilog_file, path)
+
+class MemCell(BaseCell):
+    def __init__(self, name, values):
+        super().__init__(name, ['DATA_WIDTH', 'ADDR_WIDTH'], {'WA': 'ADDR_WIDTH', 'RA': 'ADDR_WIDTH', 'WD': 'DATA_WIDTH'}, {'RD': 'DATA_WIDTH'}, values)
+    def write_rtlil_file(self, path, parameters):
+        from test_functional import yosys_synth
+        verilog_file = path.parent / 'verilog.v'
+        with open(verilog_file, 'w') as f:
+            f.write("""
+module gold(
+    input wire clk,
+    input wire [{1}:0] WA,
+    input wire [{0}:0] WD,
+    output reg [{0}:0] RD
+);
+    reg [{0}:0] mem[0:{1}];
+    always @(*)
+        RD = mem[RA];
+    always @(posedge clk)
+        mem[WA] <= WD;
+endmodule""".format(parameters['DATA_WIDTH'] - 1, parameters['ADDR_WIDTH'] - 1))
+        yosys_synth(verilog_file, path)
+
+binary_widths = [
+    # try to cover extending A operand, extending B operand, extending/truncating result
+    (16, 32, 48, True, True),
+    (16, 32, 48, False, False),
+    (32, 16, 48, True, True),
+    (32, 16, 48, False, False),
+    (32, 32, 16, True, True),
+    (32, 32, 16, False, False),
+    # have at least one test that checks small inputs, which will exercise the cornercases more
+    (4, 4, 8, True, True),
+    (4, 4, 8, False, False)
+]
+
+unary_widths = [
+    (6, 12, True),
+    (6, 12, False),
+    (32, 16, True),
+    (32, 16, False)
+]
+
+# note that meaningless combinations of signednesses are eliminated,
+# like e.g. most shift operations don't take signed shift amounts
+shift_widths = [
+    # one set of tests that definitely checks all possible shift amounts
+    # with a bigger result width to make sure it's not truncated
+    (32, 6, 64, True, False),
+    (32, 6, 64, False, False),
+    (32, 6, 64, True, True),
+    (32, 6, 64, False, True),
+    # one set that checks very oversized shifts
+    (32, 32, 64, True, False),
+    (32, 32, 64, False, False),
+    (32, 32, 64, True, True),
+    (32, 32, 64, False, True),
+    # at least one test where the result is going to be truncated
+    (32, 6, 16, False, False)
+]
+
 rtlil_cells = [
-    UnaryCell("not"),
-    UnaryCell("pos"),
-    UnaryCell("neg"),
-    BinaryCell("and"),
-    BinaryCell("or"),
-    BinaryCell("xor"),
-    BinaryCell("xnor"),
-    UnaryCell("reduce_and"),
-    UnaryCell("reduce_or"),
-    UnaryCell("reduce_xor"),
-    UnaryCell("reduce_xnor"),
-    UnaryCell("reduce_bool"),
-    ShiftCell("shl"),
-    ShiftCell("shr"),
-    ShiftCell("sshl"),
-    ShiftCell("sshr"),
-    ShiftCell("shift"),
-    ShiftCell("shiftx"),
+    UnaryCell("not", unary_widths),
+    UnaryCell("pos", unary_widths),
+    UnaryCell("neg", unary_widths),
+    BinaryCell("and", binary_widths),
+    BinaryCell("or", binary_widths),
+    BinaryCell("xor", binary_widths),
+    BinaryCell("xnor", binary_widths),
+    UnaryCell("reduce_and", unary_widths),
+    UnaryCell("reduce_or", unary_widths),
+    UnaryCell("reduce_xor", unary_widths),
+    UnaryCell("reduce_xnor", unary_widths),
+    UnaryCell("reduce_bool", unary_widths),
+    ShiftCell("shl", shift_widths),
+    ShiftCell("shr", shift_widths),
+    ShiftCell("sshl", shift_widths),
+    ShiftCell("sshr", shift_widths),
+    ShiftCell("shift", shift_widths),
+    ShiftCell("shiftx", shift_widths),
 #    ("fa", ["A", "B", "C", "X", "Y"]),
 #    ("lcu", ["P", "G", "CI", "CO"]),
 #    ("alu", ["A", "B", "CI", "BI", "X", "Y", "CO"]),
-    BinaryCell("lt"),
-    BinaryCell("le"),
-    BinaryCell("eq"),
-    BinaryCell("ne"),
-    BinaryCell("eqx"),
-    BinaryCell("nex"),
-    BinaryCell("ge"),
-    BinaryCell("gt"),
-    BinaryCell("add"),
-    BinaryCell("sub"),
-    BinaryCell("mul"),
+    BinaryCell("lt", binary_widths),
+    BinaryCell("le", binary_widths),
+    BinaryCell("eq", binary_widths),
+    BinaryCell("ne", binary_widths),
+    BinaryCell("eqx", binary_widths),
+    BinaryCell("nex", binary_widths),
+    BinaryCell("ge", binary_widths),
+    BinaryCell("gt", binary_widths),
+    BinaryCell("add", binary_widths),
+    BinaryCell("sub", binary_widths),
+    BinaryCell("mul", binary_widths),
 #    BinaryCell("macc"),
-    BinaryCell("div"),
-    BinaryCell("mod"),
-    BinaryCell("divfloor"),
-    BinaryCell("modfloor"),
-    BinaryCell("pow"),
-    UnaryCell("logic_not"),
-    BinaryCell("logic_and"),
-    BinaryCell("logic_or"),
-    SliceCell("slice"),
-    ConcatCell("concat"),
-    MuxCell("mux"),
-    BMuxCell("bmux"),
-    PMuxCell("pmux"),
-    DemuxCell("demux"),
-    LUTCell("lut"),
+    BinaryCell("div", binary_widths),
+    BinaryCell("mod", binary_widths),
+    BinaryCell("divfloor", binary_widths),
+    BinaryCell("modfloor", binary_widths),
+    BinaryCell("pow", binary_widths),
+    UnaryCell("logic_not", unary_widths),
+    BinaryCell("logic_and", binary_widths),
+    BinaryCell("logic_or", binary_widths),
+    SliceCell("slice", [(32, 10, 15), (8, 0, 4), (10, 0, 10)]),
+    ConcatCell("concat", [(16, 16), (8, 14), (20, 10)]),
+    MuxCell("mux", [10, 16, 40]),
+    BMuxCell("bmux", [(10, 1), (10, 2), (10, 4)]),
+    PMuxCell("pmux", [(10, 1), (10, 4), (20, 4)]),
+    DemuxCell("demux", [(10, 1), (32, 2), (16, 4)]),
+    LUTCell("lut", [4, 6, 8]),
 #    ("sop", ["A", "Y"]),
 #    ("tribuf", ["A", "EN", "Y"]),
 #    ("specify2", ["EN", "SRC", "DST"]),
 #    ("specify3", ["EN", "SRC", "DST", "DAT"]),
 #    ("specrule", ["EN_SRC", "EN_DST", "SRC", "DST"]),
-    BWCell("bweqx"),
-    BWCell("bwmux"),
+    BWCell("bweqx", [10, 16, 40]),
+    BWCell("bwmux", [10, 16, 40]),
+    FFCell("ff", [10, 20, 40]),
+    MemCell("mem", [(32, 4)])
 #    ("assert", ["A", "EN"]),
 #    ("assume", ["A", "EN"]),
 #    ("live", ["A", "EN"]),
@@ -260,12 +289,12 @@ rtlil_cells = [
 #    ("scopeinfo", []),
 ]
 
-def generate_test_cases(per_cell):
+def generate_test_cases(per_cell, rnd):
     tests = []
     names = []
     for cell in rtlil_cells:
         seen_names = set()
-        for (name, parameters) in cell.generate_tests():
+        for (name, parameters) in cell.generate_tests(rnd):
             if not name in seen_names:
                 seen_names.add(name)
                 tests.append((cell, parameters))
diff --git a/tests/functional/smt_vcd.py b/tests/functional/smt_vcd.py
index c38fe1bde..37d2a209f 100644
--- a/tests/functional/smt_vcd.py
+++ b/tests/functional/smt_vcd.py
@@ -1,6 +1,5 @@
 import sys
 import argparse
-import random
 import os
 import smtio
 import re
@@ -40,9 +39,10 @@ class SExprParser:
         rv, self.stack[0] = self.stack[0], []
         return rv
 
-def simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io):
+def simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io, num_steps, rnd):
     inputs = {}
     outputs = {}
+    states = {}
 
     def handle_datatype(lst):
         print(lst)
@@ -60,6 +60,14 @@ def simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io):
                 bitvec_size = declaration[1][2]
                 assert output_name.startswith("gold_Outputs_")
                 outputs[output_name[len("gold_Outputs_"):]] = int(bitvec_size)
+        elif datatype_name.endswith("_State"):
+            for declaration in declarations:
+                state_name = declaration[0]
+                assert state_name.startswith("gold_State_")
+                if declaration[1][0] == "_":
+                    states[state_name[len("gold_State_"):]] = int(declaration[1][2])
+                else:
+                    states[state_name[len("gold_State_"):]] = (declaration[1][1][2], declaration[1][2][2])
 
     parser = SExprParser()
     with open(smt_file_path, 'r') as smt_file:
@@ -73,25 +81,44 @@ def simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io):
     parser.finish()
     assert smt_io.check_sat() == 'sat'
 
+    def initial_state(states):
+        mk_state_parts = []
+        rv = []
+        for name, width in states.items():
+            if isinstance(width, int):
+                binary_string = format(0, '0{}b'.format(width))
+                mk_state_parts.append(f"#b{binary_string}")
+            else:
+                binary_string = format(0, '0{}b'.format(width[1]))
+                rv.append(f"(declare-const test_state_initial_mem_{name} (Array (_ BitVec {width[0]}) (_ BitVec {width[1]})))")
+                rv.append(f"(assert (forall ((i (_ BitVec {width[0]}))) (= (select test_state_initial_mem_{name} i) #b{binary_string})))")
+                mk_state_parts.append(f"test_state_initial_mem_{name}")
+        if len(states) == 0:
+            mk_state_call = "gold_State"
+        else:
+            mk_state_call = "(gold_State {})".format(" ".join(mk_state_parts))
+        rv.append(f"(define-const test_state_step_n0 gold_State {mk_state_call})\n")
+        return rv
+
     def set_step(inputs, step):
         # This function assumes 'inputs' is a dictionary like {"A": 5, "B": 4}
         # and 'input_values' is a dictionary like {"A": 5, "B": 13} specifying the concrete values for each input.
         
         mk_inputs_parts = []
         for input_name, width in inputs.items():
-            value = random.getrandbits(width)  # Generate a random number up to the maximum value for the bit size
+            value = rnd.getrandbits(width)  # Generate a random number up to the maximum value for the bit size
             binary_string = format(value, '0{}b'.format(width))  # Convert value to binary with leading zeros
             mk_inputs_parts.append(f"#b{binary_string}")
 
         mk_inputs_call = "gold_Inputs " + " ".join(mk_inputs_parts)
-        define_inputs = f"(define-const test_inputs_step_n{step} gold_Inputs ({mk_inputs_call}))\n"
+        return [
+            f"(define-const test_inputs_step_n{step} gold_Inputs ({mk_inputs_call}))\n",
+            f"(define-const test_results_step_n{step} (Pair gold_Outputs gold_State) (gold test_inputs_step_n{step} test_state_step_n{step}))\n",
+            f"(define-const test_outputs_step_n{step} gold_Outputs (first test_results_step_n{step}))\n",
+            f"(define-const test_state_step_n{step+1} gold_State (second test_results_step_n{step}))\n",
+        ]
 
-        define_outputs = f"(define-const test_outputs_step_n{step} gold_Outputs (first (gold test_inputs_step_n{step} gold_State)))\n"
-        smt_commands = [define_inputs, define_outputs]
-        return smt_commands
-
-    num_steps = 1000
-    smt_commands = []
+    smt_commands = initial_state(states)
     for step in range(num_steps):
         for step_command in set_step(inputs, step):
             smt_commands.append(step_command)
@@ -168,13 +195,13 @@ def simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io):
 
     write_vcd(vcd_path, signals)
 
-def simulate_smt(smt_file_path, vcd_path):
+def simulate_smt(smt_file_path, vcd_path, num_steps, rnd):
     so = smtio.SmtOpts()
     so.solver = "z3"
-    so.logic = "BV"
+    so.logic = "ABV"
     so.debug_print = True
     smt_io = smtio.SmtIo(opts=so)
     try:
-        simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io)
+        simulate_smt_with_smtio(smt_file_path, vcd_path, smt_io, num_steps, rnd)
     finally:
         smt_io.p_close()
\ No newline at end of file
diff --git a/tests/functional/test_functional.py b/tests/functional/test_functional.py
index 9b4fab970..86e515d90 100644
--- a/tests/functional/test_functional.py
+++ b/tests/functional/test_functional.py
@@ -6,11 +6,14 @@ from pathlib import Path
 
 base_path = Path(__file__).resolve().parent.parent.parent
 
+# quote a string or pathlib path so that it can be used by bash or yosys
+# TODO: is this really appropriate for yosys?
 def quote(path):
     return shlex.quote(str(path))
 
+# run a shell command and require the return code to be 0
 def run(cmd, **kwargs):
-    print(' '.join([shlex.quote(str(x)) for x in cmd]))
+    print(' '.join([quote(x) for x in cmd]))
     status = subprocess.run(cmd, **kwargs)
     assert status.returncode == 0, f"{cmd[0]} failed"
 
@@ -20,7 +23,24 @@ def yosys(script):
 def compile_cpp(in_path, out_path, args):
     run(['g++', '-g', '-std=c++17'] + args + [str(in_path), '-o', str(out_path)])
 
-def test_cxx(cell, parameters, tmp_path):
+def yosys_synth(verilog_file, rtlil_file):
+    yosys(f"read_verilog {quote(verilog_file)} ; prep ; clk2fflogic ; write_rtlil {quote(rtlil_file)}")
+
+# simulate an rtlil file with yosys, comparing with a given vcd file, and writing out the yosys simulation results into a second vcd file
+def yosys_sim(rtlil_file, vcd_reference_file, vcd_out_file):
+    try:
+        yosys(f"read_rtlil {quote(rtlil_file)}; sim -r {quote(vcd_reference_file)} -scope gold -vcd {quote(vcd_out_file)} -timescale 1us -sim-gold")
+    except:
+        # if yosys sim fails it's probably because of a simulation mismatch
+        # since yosys sim aborts on simulation mismatch to generate vcd output
+        # we have to re-run with a different set of flags
+        # on this run we ignore output and return code, we just want a best-effort attempt to get a vcd
+        subprocess.run([base_path / 'yosys', '-Q', '-p',
+            f'read_rtlil {quote(rtlil_file)}; sim -vcd {quote(vcd_out_file)} -a -r {quote(vcd_reference_file)} -scope gold -timescale 1us'],
+            capture_output=True, check=False)
+        raise
+
+def test_cxx(cell, parameters, tmp_path, num_steps, rnd):
     rtlil_file = tmp_path / 'rtlil.il'
     vcdharness_cc_file = base_path / 'tests/functional/vcd_harness.cc'
     cc_file = tmp_path / 'my_module_functional_cxx.cc'
@@ -28,20 +48,14 @@ def test_cxx(cell, parameters, tmp_path):
     vcd_functional_file = tmp_path / 'functional.vcd'
     vcd_yosys_sim_file = tmp_path / 'yosys.vcd'
 
-    with open(rtlil_file, 'w') as f:
-        cell.write_rtlil_file(f, parameters)
+    cell.write_rtlil_file(rtlil_file, parameters)
     yosys(f"read_rtlil {quote(rtlil_file)} ; write_functional_cxx {quote(cc_file)}")
     compile_cpp(vcdharness_cc_file, vcdharness_exe_file, ['-I', tmp_path, '-I', str(base_path / 'backends/functional/cxx_runtime')])
-    run([str(vcdharness_exe_file.resolve()), str(vcd_functional_file)])
-    try:
-        yosys(f"read_rtlil {quote(rtlil_file)}; sim -r {quote(vcd_functional_file)} -scope gold -vcd {quote(vcd_yosys_sim_file)} -timescale 1us -sim-gold")
-    except:
-        subprocess.run([base_path / 'yosys', '-Q', '-p',
-            f'read_rtlil {quote(rtlil_file)}; sim -vcd {quote(vcd_yosys_sim_file)} -r {quote(vcd_functional_file)} -scope gold -timescale 1us'],
-            capture_output=True, check=False)
-        raise
+    seed = str(rnd(cell.name + "-cxx").getrandbits(32))
+    run([str(vcdharness_exe_file.resolve()), str(vcd_functional_file), str(num_steps), str(seed)])
+    yosys_sim(rtlil_file, vcd_functional_file, vcd_yosys_sim_file)
 
-def test_smt(cell, parameters, tmp_path):
+def test_smt(cell, parameters, tmp_path, num_steps, rnd):
     import smt_vcd
 
     rtlil_file = tmp_path / 'rtlil.il'
@@ -49,15 +63,8 @@ def test_smt(cell, parameters, tmp_path):
     vcd_functional_file = tmp_path / 'functional.vcd'
     vcd_yosys_sim_file = tmp_path / 'yosys.vcd'
 
-    with open(rtlil_file, 'w') as f:
-        cell.write_rtlil_file(f, parameters)
+    cell.write_rtlil_file(rtlil_file, parameters)
     yosys(f"read_rtlil {quote(rtlil_file)} ; write_functional_smt2 {quote(smt_file)}")
-    run(['z3', smt_file])
-    smt_vcd.simulate_smt(smt_file, vcd_functional_file)
-    try:
-        yosys(f"read_rtlil {quote(rtlil_file)}; sim -r {quote(vcd_functional_file)} -scope gold -vcd {quote(vcd_yosys_sim_file)} -timescale 1us -sim-gold")
-    except:
-        subprocess.run([base_path / 'yosys', '-Q', '-p',
-            f'read_rtlil {quote(rtlil_file)}; sim -vcd {quote(vcd_yosys_sim_file)} -r {quote(vcd_functional_file)} -scope gold -timescale 1us'],
-            capture_output=True, check=False)
-        raise
\ No newline at end of file
+    run(['z3', smt_file]) # check if output is valid smtlib before continuing
+    smt_vcd.simulate_smt(smt_file, vcd_functional_file, num_steps, rnd(cell.name + "-smt"))
+    yosys_sim(rtlil_file, vcd_functional_file, vcd_yosys_sim_file)
\ No newline at end of file
diff --git a/tests/functional/vcd_harness.cc b/tests/functional/vcd_harness.cc
index f99d2909b..f01adf218 100644
--- a/tests/functional/vcd_harness.cc
+++ b/tests/functional/vcd_harness.cc
@@ -2,15 +2,46 @@
 #include <iostream>
 #include <fstream>
 #include <random>
+#include <ctype.h>
+#include <vector>
 
 #include "my_module_functional_cxx.cc"
 
+std::string vcd_name_mangle(std::string name) {
+  std::string ret = name;
+  bool escape = ret.empty() || !isalpha(ret[0]) && ret[0] != '_';
+  for(size_t i = 0; i < ret.size(); i++) {
+    if(isspace(ret[i])) ret[i] = '_';
+    if(!isalnum(ret[i]) && ret[i] != '_' && ret[i] != '$')
+      escape = true;
+  }
+  if(escape)
+    return "\\" + ret;
+  else
+    return ret;
+}
+std::unordered_map<std::string, std::string> codes; 
+
 struct DumpHeader {
   std::ofstream &ofs;
+  std::string code = "!";
   DumpHeader(std::ofstream &ofs) : ofs(ofs) {}
+  void increment_code() {
+    for(size_t i = 0; i < code.size(); i++)
+      if(code[i]++ == '~')
+        code[i] = '!';
+      else
+        return;
+    code.push_back('!');
+  }
   template <size_t n>
   void operator()(const char *name, Signal<n> value) {
-    ofs << "$var wire " << n << " " << name[0] << " " << name << " $end\n";
+    ofs << "$var wire " << n << " " << code << " " << vcd_name_mangle(name) << " $end\n";
+    codes[name] = code;
+    increment_code();
+  }
+  template <size_t n, size_t m>
+  void operator()(const char *name, Memory<n, m> value) {
   }
 };
 
@@ -22,14 +53,17 @@ struct Dump {
     // Bit
     if (n == 1) {
       ofs << (value[0] ? '1' : '0');
-      ofs << name[0] << "\n";
+      ofs << codes[name] << "\n";
       return;
     }
     // vector (multi-bit) signals
     ofs << "b";
     for (size_t i = n; i-- > 0;)
       ofs << (value[i] ? '1' : '0');
-    ofs << " " << name[0] << "\n";
+    ofs << " " << codes[name] << "\n";
+  }
+  template <size_t n, size_t m>
+  void operator()(const char *name, Memory<n, m> value) {
   }
 };
 
@@ -61,14 +95,15 @@ struct Randomize {
 
 int main(int argc, char **argv)
 {
-  if (argc != 2) {
-    std::cerr << "Usage: " << argv[0] << " <functional_vcd_filename>\n";
+  if (argc != 4) {
+    std::cerr << "Usage: " << argv[0] << " <functional_vcd_filename> <steps> <seed>\n";
     return 1;
   }
 
   const std::string functional_vcd_filename = argv[1];
+  const int steps = atoi(argv[2]);
+  const uint32_t seed = atoi(argv[3]);
 
-  constexpr int steps = 1000;
   constexpr int number_timescale = 1;
   const std::string units_timescale = "us";
   gold::Inputs inputs;
@@ -87,27 +122,12 @@ int main(int argc, char **argv)
     state.visit(d);
   }
   vcd_file << "$enddefinitions $end\n$dumpvars\n";
-  vcd_file << "#0\n";
-  // Set all signals to false
+  std::mt19937 gen(seed);
+
   inputs.visit(Reset());
 
-  gold::eval(inputs, outputs, state, next_state);
-  {
-    Dump d(vcd_file);
-    inputs.visit(d);
-    outputs.visit(d);
-    state.visit(d);
-  }
-
-  // Initialize random number generator once
-  std::random_device rd;
-  std::mt19937 gen(rd());
-
   for (int step = 0; step < steps; ++step) {
-    // Functional backend cxx
-    vcd_file << "#" << (step + 1) << "\n";
-    inputs.visit(Randomize(gen));
-
+    vcd_file << "#" << step << "\n";
     gold::eval(inputs, outputs, state, next_state);
     {
       Dump d(vcd_file);
@@ -117,6 +137,7 @@ int main(int argc, char **argv)
     }
 
     state = next_state;
+    inputs.visit(Randomize(gen));
   }
 
   vcd_file.close();