From 72c6a01e6743d21937cf407abe19a75850f8a9d4 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 6 Nov 2023 14:01:37 +0100 Subject: [PATCH 1/5] peepopt: Add initial `shiftadd` pattern --- passes/pmgen/Makefile.inc | 1 + passes/pmgen/peepopt.cc | 1 + passes/pmgen/peepopt_shiftadd.pmg | 116 ++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 passes/pmgen/peepopt_shiftadd.pmg diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index c2257b720..884e12522 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -44,6 +44,7 @@ $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul_right.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_shiftmul_left.pmg +PEEPOPT_PATTERN += passes/pmgen/peepopt_shiftadd.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index aef464d79..b4394dfd4 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -72,6 +72,7 @@ struct PeepoptPass : public Pass { pm.setup(module->selected_cells()); + pm.run_shiftadd(); pm.run_shiftmul_right(); pm.run_shiftmul_left(); pm.run_muldiv(); diff --git a/passes/pmgen/peepopt_shiftadd.pmg b/passes/pmgen/peepopt_shiftadd.pmg new file mode 100644 index 000000000..172310499 --- /dev/null +++ b/passes/pmgen/peepopt_shiftadd.pmg @@ -0,0 +1,116 @@ +pattern shiftadd +// +// Transforms add/sub+shift pairs that result from expressions such as data[s*W +C +:W2] +// specifically something like: out[W2-1:0] = data >> (s*W +C) +// will be transformed into: out[W2-1:0] = (data >> C) >> (s*W) +// this can then be optimized using peepopt_shiftmul_right.pmg +// + +match shift + select shift->type.in($shift, $shiftx, $shr) + filter !port(shift, \B).empty() +endmatch + +// the right shift amount +state shift_amount +// log2 scale factor in interpreting of shift_amount +// due to zero padding on the shift cell's B port +state log2scale +// zeros at the MSB position make it unsigned +state msb_zeros + +code shift_amount log2scale msb_zeros + shift_amount = port(shift, \B); + + log2scale = 0; + while (shift_amount[0] == State::S0) { + shift_amount.remove(0); + if (shift_amount.empty()) reject; + log2scale++; + } + + msb_zeros = 0; + while (shift_amount.bits().back() == State::S0) { + msb_zeros = true; + shift_amount.remove(GetSize(shift_amount) - 1); + if (shift_amount.empty()) reject; + } + + if (GetSize(shift_amount) > 20) + reject; +endcode + +state add_var +state add_const +state is_sub +state varport_A + +match add + // either data[var+c +:W1] or data[var-c +:W1] + select add->type.in($add, $sub) + index port(add, \Y) === shift_amount + + // one must be constant, the other is variable + choice constport {\A, \B} + filter port(add, constport).is_fully_const() + define varport (constport == \A ? \B : \A) + + set is_sub add->type.in($sub) + set varport_A (varport == \A) + + // (var+c)< (var<getPort(varport) +endmatch + +code +{ + log_debug("shiftadd candidate in %s: shift=%s, add/sub=%s\n", log_id(module), log_id(shift), log_id(add)); + if (add_const.empty() || GetSize(add_const) > 20) + reject; + + int offset = add_const.as_int() * ( (is_sub && varport_A) ? -1 : 1 ); + bool varport_signed = (varport_A && param(add, \A_SIGNED).as_bool()) \ + || (!varport_A && param(add, \B_SIGNED).as_bool()); + + // data[...-c +:W1] is fine for +/-var (pad at LSB, all data still accessible) + // data[...+c +:W1] is only fine for +var(add) and var unsigned + // (+c cuts lower C bits, making them inaccessible, a signed var could try to access them) + // -> data[c-var +:W1] is illegal + if (is_sub && !varport_A) + reject; + // -> data[var+c +:W1] (with var signed) is illegal + if ( (offset>0) && varport_signed ) + reject; + + did_something = true; + log("shiftadd pattern in %s: shift=%s, add/sub=%s, offset: %d\n", \ + log_id(module), log_id(shift), log_id(add), offset); + + SigSpec old_a = port(shift, \A), new_a; + if(offset<0) { + // data >> (...-c) transformed to {data, c'X} >> (...) + SigSpec padding( (shift->type.in($shiftx) ? State::Sx : State::S0), -offset ); + new_a.append(padding); + new_a.append(old_a); + } else { + // data >> (...+c) transformed to data[MAX:c] >> (...) + new_a.append(old_a.extract(offset, GetSize(old_a)-1-offset)); + + } + + SigSpec new_b = {add_var, SigSpec(State::S0, log2scale)}; + if (msb_zeros || !varport_signed) + new_b.append(State::S0); + + shift->setPort(\A, new_a); + shift->setParam(\A_WIDTH, GetSize(new_a)); + shift->setPort(\B, new_b); + shift->setParam(\B_WIDTH, GetSize(new_b)); + blacklist(add); + accept; +} +endcode From 9ca57d9f1312ccd8cd889031805afdb434ca46f2 Mon Sep 17 00:00:00 2001 From: phsauter Date: Mon, 6 Nov 2023 14:01:37 +0100 Subject: [PATCH 2/5] peepopt: fix and refactor `shiftadd` - moved all selection and filtering logic to the match block - applied less-verbose code suggestions - removed constraint on number of bits in shift-amount - added check for possible wrap-arround in the operation --- passes/pmgen/peepopt_shiftadd.pmg | 72 ++++++++++++++++--------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/passes/pmgen/peepopt_shiftadd.pmg b/passes/pmgen/peepopt_shiftadd.pmg index 172310499..25f0756da 100644 --- a/passes/pmgen/peepopt_shiftadd.pmg +++ b/passes/pmgen/peepopt_shiftadd.pmg @@ -35,15 +35,12 @@ code shift_amount log2scale msb_zeros shift_amount.remove(GetSize(shift_amount) - 1); if (shift_amount.empty()) reject; } - - if (GetSize(shift_amount) > 20) - reject; endcode -state add_var -state add_const -state is_sub -state varport_A +state var_signed +state var_signal +// offset: signed constant value c in data[var+c +:W1] (constant shift-right amount) +state offset match add // either data[var+c +:W1] or data[var-c +:W1] @@ -52,39 +49,46 @@ match add // one must be constant, the other is variable choice constport {\A, \B} - filter port(add, constport).is_fully_const() + select !port(add, constport).empty() + select port(add, constport).is_fully_const() define varport (constport == \A ? \B : \A) - set is_sub add->type.in($sub) - set varport_A (varport == \A) + // if a value of var is able to wrap the output, the transformation might give wrong results + // an addition/substraction can at most flip one more bit than the largest operand (the carry bit) + // as long as the output can show this bit, no wrap should occur (assuming all signed-ness make sense) + select ( GetSize(port(add, \Y)) > max(GetSize(port(add, \A)), GetSize(port(add, \B))) ) - // (var+c)< (var< varport_A (varport == \A) + define is_sub add->type.in($sub) - // get add_var unmapped (so no `port()` shorthand) - // to attach it to the transformed shift cells \B port - set add_var add->getPort(varport) + define constport_signed param(add, !varport_A ? \A_SIGNED : \B_SIGNED).as_bool() + define varport_signed param(add, varport_A ? \A_SIGNED : \B_SIGNED).as_bool(); + define offset_negative ((port(add, constport).bits().back() == State::S1) ^ (is_sub && varport_A)) + + // checking some value boundaries as well: + // data[...-c +:W1] is fine for +/-var (pad at LSB, all data still accessible) + // data[...+c +:W1] is only fine for +var(add) and var unsigned + // (+c cuts lower C bits, making them inaccessible, a signed var could try to access them) + // either its an add or the variable port is A (it must be positive) + select (add->type.in($add) || varport == \A) + + // -> data[var+c +:W1] (with var signed) is illegal + filter !(!offset_negative && varport_signed) + + // state-variables are assigned at the end only: + // shift the log2scale offset in-front of add to get true value: (var+c)< (var<getPort(varport) endmatch code { - log_debug("shiftadd candidate in %s: shift=%s, add/sub=%s\n", log_id(module), log_id(shift), log_id(add)); - if (add_const.empty() || GetSize(add_const) > 20) - reject; - - int offset = add_const.as_int() * ( (is_sub && varport_A) ? -1 : 1 ); - bool varport_signed = (varport_A && param(add, \A_SIGNED).as_bool()) \ - || (!varport_A && param(add, \B_SIGNED).as_bool()); - - // data[...-c +:W1] is fine for +/-var (pad at LSB, all data still accessible) - // data[...+c +:W1] is only fine for +var(add) and var unsigned - // (+c cuts lower C bits, making them inaccessible, a signed var could try to access them) - // -> data[c-var +:W1] is illegal - if (is_sub && !varport_A) - reject; - // -> data[var+c +:W1] (with var signed) is illegal - if ( (offset>0) && varport_signed ) + if (offset>0 && var_signed) { + log("I should not be here %x\n", var_signed); reject; + } did_something = true; log("shiftadd pattern in %s: shift=%s, add/sub=%s, offset: %d\n", \ @@ -98,12 +102,12 @@ code new_a.append(old_a); } else { // data >> (...+c) transformed to data[MAX:c] >> (...) - new_a.append(old_a.extract(offset, GetSize(old_a)-1-offset)); + new_a.append(old_a.extract_end(offset)); } - SigSpec new_b = {add_var, SigSpec(State::S0, log2scale)}; - if (msb_zeros || !varport_signed) + SigSpec new_b = {var_signal, SigSpec(State::S0, log2scale)}; + if (msb_zeros || !var_signed) new_b.append(State::S0); shift->setPort(\A, new_a); From b6df900bcc97beb190207f69475fdfada1d94229 Mon Sep 17 00:00:00 2001 From: Philippe Sauter Date: Mon, 6 Nov 2023 14:01:37 +0100 Subject: [PATCH 3/5] peepopt: Describe `shiftadd` rule in help message --- passes/pmgen/peepopt.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index b4394dfd4..edd3b18a8 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -48,6 +48,9 @@ struct PeepoptPass : public Pass { log(" Analogously, replace A<<(B*C) with appropriate selection of\n"); log(" output bits from A<<(B<>(B+D) with (A'>>D)>>(B) where D is constant and\n"); + log(" A' is derived from A by padding or cutting inaccessible bits.\n"); + log("\n"); } void execute(std::vector args, RTLIL::Design *design) override { From c3b8de54dae39a67046875dbc540da59bc45d342 Mon Sep 17 00:00:00 2001 From: phsauter Date: Mon, 6 Nov 2023 14:01:37 +0100 Subject: [PATCH 4/5] test: add tests for `shiftadd` and `shiftmul` This expands the part-select tests with one additional module. It specifically tests the different variants of the `peepopt` optimizations `shiftadd` and `shiftmul`. Not all these cases are actually transformed using `shiftadd`, including them also checks if the correct variants are rejected. --- tests/simple/partsel.v | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/simple/partsel.v b/tests/simple/partsel.v index 5e9730d6b..722ed7b1b 100644 --- a/tests/simple/partsel.v +++ b/tests/simple/partsel.v @@ -110,3 +110,42 @@ module partsel_test007 ( dout[n+1] = din[n]; end endmodule + + +module partsel_test008 ( + input [127:0] din, + input [3:0] idx, + input [4:0] uoffset, + input signed [4:0] soffset, + output [ 7:0] dout0, + output [ 7:0] dout1, + output [ 7:0] dout2, + output [ 7:0] dout3, + output [ 3:0] dout4, + output [ 3:0] dout5, + output [ 3:0] dout6, + output [ 3:0] dout7, + output [ 3:0] dout8, + output [11:0] dout9, + output [11:0] dout10, + output [11:0] dout11 +); + +// common: block-select with offsets +assign dout0 = din[idx*8 +uoffset +:8]; +assign dout1 = din[idx*8 -uoffset +:8]; +assign dout2 = din[idx*8 +soffset +:8]; +assign dout3 = din[idx*8 -soffset +:8]; + +// only partial block used +assign dout4 = din[idx*8 +uoffset +:4]; +assign dout5 = din[idx*8 -uoffset +:4]; +assign dout6 = din[idx*8 +soffset +:4]; +assign dout7 = din[idx*8 -soffset +:4]; + +// uncommon: more than one block used +assign dout8 = din[idx*8 +uoffset +:12]; +assign dout9 = din[idx*8 -uoffset +:12]; +assign dout10 = din[idx*8 +soffset +:12]; +assign dout11 = din[idx*8 -soffset +:12]; +endmodule From 3618294bacbb5b3a2514e5ef69effb0f82c321af Mon Sep 17 00:00:00 2001 From: phsauter Date: Mon, 6 Nov 2023 16:35:00 +0100 Subject: [PATCH 5/5] peepopt: Add assert of consistent `shiftadd` data --- passes/pmgen/peepopt_shiftadd.pmg | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/passes/pmgen/peepopt_shiftadd.pmg b/passes/pmgen/peepopt_shiftadd.pmg index 25f0756da..f9c930eae 100644 --- a/passes/pmgen/peepopt_shiftadd.pmg +++ b/passes/pmgen/peepopt_shiftadd.pmg @@ -85,10 +85,11 @@ endmatch code { - if (offset>0 && var_signed) { - log("I should not be here %x\n", var_signed); - reject; - } + // positive constant offset with a signed variable (index) cannot be handled + // the above filter should get rid of this case but 'offset' is calculated differently + // due to limitations of state-variables in pmgen + // it should only differ if previous passes create invalid data + log_assert(!(offset>0 && var_signed)); did_something = true; log("shiftadd pattern in %s: shift=%s, add/sub=%s, offset: %d\n", \