diff --git a/passes/opt/Makefile.inc b/passes/opt/Makefile.inc index e7b62fc6a..4a22c2301 100644 --- a/passes/opt/Makefile.inc +++ b/passes/opt/Makefile.inc @@ -32,6 +32,7 @@ $(eval $(call add_extra_objs,passes/opt/peepopt_pm.h)) PEEPOPT_PATTERN = passes/opt/peepopt_shiftmul_right.pmg PEEPOPT_PATTERN += passes/opt/peepopt_shiftmul_left.pmg PEEPOPT_PATTERN += passes/opt/peepopt_shiftadd.pmg +PEEPOPT_PATTERN += passes/opt/peepopt_shiftpow2.pmg PEEPOPT_PATTERN += passes/opt/peepopt_muldiv.pmg PEEPOPT_PATTERN += passes/opt/peepopt_muldiv_c.pmg PEEPOPT_PATTERN += passes/opt/peepopt_formal_clockgateff.pmg diff --git a/passes/opt/peepopt.cc b/passes/opt/peepopt.cc index fa7cf74a0..ac5b571f7 100644 --- a/passes/opt/peepopt.cc +++ b/passes/opt/peepopt.cc @@ -68,6 +68,12 @@ struct PeepoptPass : public Pass { log(" limits the amount of padding to a multiple of the data, \n"); log(" to avoid high resource usage from large temporary MUX trees.\n"); log("\n"); + log(" * shiftpow2 - Replace A>>(B<type.in($shift, $shiftx, $shr) + filter !port(shift, \B).empty() +endmatch + +code +{ + // make sure the shift amount cannot be negative + SigSpec amount = port(shift, \B); + bool b_signed = shift->type.in($shift, $shiftx) && param(shift, \B_SIGNED).as_bool(); + if (!b_signed) + amount.append(State::S0); + if (amount.bits().back() != State::S0) + reject; + + while (GetSize(amount) > 1 && amount.bits().back() == State::S0) + amount.remove(GetSize(amount) - 1); + + // low zero bits encode the power-of-two scale + int log2scale = 0; + while (!amount.empty() && amount[0] == State::S0) { + amount.remove(0); + log2scale++; + } + + if (log2scale < 1) + reject; + + if (amount.empty() || amount.is_fully_const()) + reject; + + SigSpec sel = amount; + int sel_width = GetSize(sel); + int width = param(shift, \Y_WIDTH).as_int(); + if (log2scale >= 8 * (int)sizeof(int) - 1) + reject; + int stride = 1 << log2scale; + + // avoid overlapping selections + if (width > stride) + reject; + + if (sel_width > 20) + reject; + long long ways = 1LL << sel_width; + + SigSpec A = port(shift, \A); + int a_width = GetSize(A); + bool a_signed = !shift->type.in($shiftx) && param(shift, \A_SIGNED).as_bool(); + int extended_a_width = a_signed ? std::max(a_width, width) : a_width; + + // limit padding for out-of-range select values + int max_ratio = module->design->scratchpad_get_int("peepopt.shiftpow2.max_data_multiple", 2); + if (ways * (long long)width > (long long)max_ratio * std::max(a_width, width)) + reject; + + did_something = true; + log("shiftpow2 pattern in %s: shift=%s, index=%s, stride=%d, width=%d, ways=%lld\n", + module, shift, log_signal(sel), stride, width, ways); + + // way m holds A[m*stride +: width], way 0 in the LSBs + State fill = shift->type.in($shiftx) ? State::Sx : State::S0; + SigSpec bmux_a; + for (long long m = 0; m < ways; m++) { + long long base = m * (long long)stride; + for (int b = 0; b < width; b++) { + long long idx = base + b; + if (idx < a_width) + bmux_a.append(A[idx]); + else if (idx < extended_a_width) + bmux_a.append(A.back()); + else + bmux_a.append(fill); + } + } + + module->addBmux(NEW_ID, bmux_a, sel, port(shift, \Y)); + autoremove(shift); + accept; +} +endcode diff --git a/tests/various/peepopt.ys b/tests/various/peepopt.ys index cbbd477e8..e0b9946cf 100644 --- a/tests/various/peepopt.ys +++ b/tests/various/peepopt.ys @@ -8,8 +8,8 @@ prep -nokeepdc equiv_opt -assert peepopt design -load postopt clean -select -assert-count 1 t:$shiftx -select -assert-count 0 t:$shiftx t:* %D +select -assert-count 1 t:$bmux +select -assert-count 0 t:$bmux t:* %D #################### @@ -72,9 +72,10 @@ design -import gate -as gate peepopt_shiftmul_3 miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter sat -verify -show-public -enable_undef -prove-asserts miter cd gate -select -assert-count 1 t:$shr -select -assert-count 1 t:$mul -select -assert-count 0 t:$shr t:$mul %% t:* %D +clean +select -assert-count 1 t:$bmux +select -assert-count 0 t:$shr +select -assert-count 0 t:$mul #################### @@ -92,3 +93,155 @@ equiv_opt -assert peepopt design -load postopt clean select -assert-count 0 t:* + +#################### + +# shiftpow2: a power-of-two part-select i[s*W+:W] becomes a $bmux word mux +design -reset +read_verilog <> (S*8), checked by SAT miter +design -reset +read_verilog <> (S*8); +endmodule +EOT + +prep +design -save gold +peepopt +design -stash gate + +design -import gold -as gold peepopt_shiftpow2_1 +design -import gate -as gate peepopt_shiftpow2_1 + +miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter +sat -verify -show-public -enable_undef -prove-asserts miter +cd gate +clean +select -assert-count 1 t:$bmux +select -assert-count 0 t:$shr + +#################### + +# shiftpow2: width smaller than stride is non-overlapping +design -reset +read_verilog <> (S*8); +endmodule +EOT + +prep +design -save gold +peepopt +design -stash gate + +design -import gold -as gold peepopt_shiftpow2_narrow +design -import gate -as gate peepopt_shiftpow2_narrow + +miter -equiv -make_assert -make_outputs -ignore_gold_x -flatten gold gate miter +sat -verify -show-public -enable_undef -prove-asserts miter +cd gate +clean +select -assert-count 1 t:$bmux +select -assert-count 0 t:$shr + +#################### + +# shiftpow2: signed part-select with out-of-range padding +design -reset +read_verilog <> (S*8); +endmodule +EOT + +prep +design -save gold +peepopt +design -stash gate + +design -import gold -as gold peepopt_shiftpow2_signed_shr +design -import gate -as gate peepopt_shiftpow2_signed_shr + +miter -equiv -make_assert -make_outputs -flatten gold gate miter +sat -verify -show-public -prove-asserts miter +cd gate +clean +select -assert-count 1 t:$bmux +select -assert-count 0 t:$shr + +#################### + +# shiftpow2 must NOT fire for overlapping selections +design -reset +read_verilog <> (S*4); +endmodule +EOT + +prep -nokeepdc +peepopt +clean +select -assert-count 0 t:$bmux +select -assert-count 1 t:$shr + +#################### + +# shiftpow2: shiftmul can expose a non-overlapping power-of-two stride +design -reset +read_verilog <