diff --git a/Makefile b/Makefile index 393408603..4933b6b18 100644 --- a/Makefile +++ b/Makefile @@ -888,6 +888,7 @@ SH_TEST_DIRS += tests/bram SH_TEST_DIRS += tests/svinterfaces SH_TEST_DIRS += tests/xprop SH_TEST_DIRS += tests/select +SH_TEST_DIRS += tests/peepopt SH_TEST_DIRS += tests/proc SH_TEST_DIRS += tests/blif SH_TEST_DIRS += tests/arch diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc index b796535e3..08d8191c7 100644 --- a/passes/opt/Makefile.inc +++ b/passes/opt/Makefile.inc @@ -32,6 +32,7 @@ PEEPOPT_PATTERN = passes/opt/peepopt_shiftmul_right.pmg PEEPOPT_PATTERN += passes/opt/peepopt_shiftmul_left.pmg PEEPOPT_PATTERN += passes/opt/peepopt_shiftadd.pmg PEEPOPT_PATTERN += passes/opt/peepopt_muldiv.pmg +PEEPOPT_PATTERN += passes/opt/peepopt_muldiv_c.pmg PEEPOPT_PATTERN += passes/opt/peepopt_formal_clockgateff.pmg passes/opt/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) diff --git a/passes/opt/peepopt.cc b/passes/opt/peepopt.cc index faacfb304..fa7cf74a0 100644 --- a/passes/opt/peepopt.cc +++ b/passes/opt/peepopt.cc @@ -29,6 +29,14 @@ bool did_something; // scratchpad configurations for pmgen int shiftadd_max_ratio; +// Helper function, removes LSB 0s +SigSpec remove_bottom_padding(SigSpec sig) +{ + int i = 0; + for (; i < sig.size() - 1 && sig[i] == State::S0; i++); + return sig.extract(i, sig.size() - i); +} + #include "passes/opt/peepopt_pm.h" struct PeepoptPass : public Pass { @@ -45,6 +53,8 @@ struct PeepoptPass : public Pass { log("\n"); log(" * muldiv - Replace (A*B)/B with A\n"); log("\n"); + log(" * muldiv_c - Replace (A*B)/C with A*(B/C) when C is a const divisible by B.\n"); + log("\n"); log(" * shiftmul - Replace A>>(B*C) with A'>>(B<div into const->mul when b and c are divisible constants: +// y = (a * b_const) / c_const ===> a * eval(b_const / c_const) +// + +state a b_const mul_y + +match mul + // Select multiplier + select mul->type == $mul +endmatch + +code a b_const mul_y + // Get multiplier signals + a = port(mul, \A); + b_const = port(mul, \B); + mul_y = port(mul, \Y); + + // Fanout of each multiplier Y bit should be 1 (no bit-split) + if (nusers(mul_y) != 2) + reject; + + // A and B can be interchanged + branch; + std::swap(a, b_const); +endcode + +match div + // Select div of form (a * b_const) / c_const + select div->type == $div + + // Check that b_const and c_const is constant + filter b_const.is_fully_const() + filter port(div, \B).is_fully_const() + index remove_bottom_padding(port(div, \A)) === mul_y +endmatch + +code + // Get div signals + SigSpec div_a = port(div, \A); + SigSpec c_const = port(div, \B); + SigSpec div_y = port(div, \Y); + + // Get offset of multiplier result chunk in divider + int offset = GetSize(div_a) - GetSize(mul_y); + + // Get properties and values of b_const and c_const + // b_const may be coming from the A port + // But it is an RTLIL invariant that A_SIGNED equals B_SIGNED + bool b_const_signed = mul->getParam(ID::B_SIGNED).as_bool(); + bool c_const_signed = div->getParam(ID::B_SIGNED).as_bool(); + int b_const_int = b_const.as_int(b_const_signed); + int c_const_int = c_const.as_int(c_const_signed); + int b_const_int_shifted = b_const_int << offset; + + // Helper lambdas for two's complement math + auto sign2sComplement = [](auto value, int numBits) { + if (value & (1 << (numBits - 1))) { + return -1; + } else { + return 1; + } + }; + auto twosComplement = [](auto value, int numBits) { + if (value & (1 << (numBits - 1))) { + return (~value) + 1; // invert bits before adding 1 + } else { + return value; + } + }; + + // Two's complement conversion + if (b_const_signed) + b_const_int = sign2sComplement(b_const_int, GetSize(b_const)) * twosComplement(b_const_int, GetSize(b_const)); + if (c_const_signed) + c_const_int = sign2sComplement(c_const_int, GetSize(c_const)) * twosComplement(c_const_int, GetSize(c_const)); + // Calculate the constant and compress the width to fit the value + Const const_ratio; + Const b_const_actual; + // Avoid division by zero + if (c_const_int == 0) + reject; + b_const_actual = b_const_int_shifted; + b_const_actual.compress(b_const_signed); + + const_ratio = b_const_int_shifted / c_const_int; + const_ratio.compress(b_const_signed | c_const_signed); + + // Integer values should be lesser than 32 bits + // This is because we are using C++ types, and int is 32 bits + // FIXME: use long long or BigInteger to make pass work with >32 bits + if (GetSize(mul->getParam(ID::B_WIDTH)) > 32) + reject; + if (GetSize(b_const) > 32) + reject; + if (GetSize(c_const) + offset > 32) + reject; + + // Check for potential multiplier overflow + if (GetSize(b_const_actual) + GetSize(a) > GetSize(mul_y)) + reject; + + // Check that there are only zeros before offset + if (offset < 0 || !div_a.extract(0, offset).is_fully_zero()) + reject; + + // Check that b is divisible by c + if (b_const_int_shifted % c_const_int != 0) + reject; + + // Rewire to only keep multiplier + mul->setPort(\A, a); + mul->setPort(\B, const_ratio); + mul->setPort(\Y, div_y); + + // Remove divider + autoremove(div); + + // Log, fixup, accept + log("muldiv_const pattern in %s: mul=%s, div=%s\n", log_id(module), log_id(mul), log_id(div)); + mul->fixup_parameters(); + accept; +endcode diff --git a/tests/peepopt/.gitignore b/tests/peepopt/.gitignore new file mode 100644 index 000000000..50e13221d --- /dev/null +++ b/tests/peepopt/.gitignore @@ -0,0 +1 @@ +/*.log diff --git a/tests/peepopt/muldiv_c.ys b/tests/peepopt/muldiv_c.ys new file mode 100644 index 000000000..62777caab --- /dev/null +++ b/tests/peepopt/muldiv_c.ys @@ -0,0 +1,343 @@ +log -header "Test simple positive case" +log -push +design -reset +read_verilog <