diff --git a/kernel/compressor_tree.h b/kernel/compressor_tree.h index ff5978977..221354273 100644 --- a/kernel/compressor_tree.h +++ b/kernel/compressor_tree.h @@ -305,6 +305,87 @@ inline std::pair reduce_scheduled(Module *module, std::vector< return {operands[0].sig, operands[1].sig}; } +/** + * emit_kogge_stone() - Emit a Kogge-Stone parallel-prefix adder + * @module: The Yosys module to which the gates will be added + * @a: Signal A + * @b: Signal B + * @y: Signal Y = (A + B) mod 2^W + */ +inline void emit_kogge_stone(Module *module, SigSpec a, SigSpec b, SigSpec y) +{ + int width = GetSize(y); + log_assert(GetSize(a) == width); + log_assert(GetSize(b) == width); + + if (width == 0) + return; + + if (width == 1) { + module->addXorGate(NEW_ID, a[0], b[0], y[0]); + return; + } + + // Bit level gen and prop + std::vector g_pre(width), p_pre(width); + for (int i = 0; i < width; i++) { + SigBit gi = module->addWire(NEW_ID); + SigBit pi = module->addWire(NEW_ID); + module->addAndGate(NEW_ID, a[i], b[i], gi); + module->addXorGate(NEW_ID, a[i], b[i], pi); + g_pre[i] = gi; + p_pre[i] = pi; + } + + // Propagate (g, p) through ceil(log2 W) levels + std::vector g = g_pre; + std::vector p = p_pre; + int num_levels = 0; + + while ((1 << num_levels) < width) + num_levels++; + + for (int k = 1; k <= num_levels; k++) { + int s = 1 << (k - 1); + std::vector g_next(width), p_next(width); + for (int i = 0; i < width; i++) { + if (i < s) { + // Nothing to do + g_next[i] = g[i]; + p_next[i] = p[i]; + } else { + // g_i^k = g_i | (p_i & g_(i-s)) + SigBit and_pg = module->addWire(NEW_ID); + module->addAndGate(NEW_ID, p[i], g[i - s], and_pg); + SigBit gnew = module->addWire(NEW_ID); + module->addOrGate(NEW_ID, g[i], and_pg, gnew); + g_next[i] = gnew; + + // p_i^k = p_i & p_(i-s) + if (k < num_levels) { + SigBit pnew = module->addWire(NEW_ID); + module->addAndGate(NEW_ID, p[i], p[i - s], pnew); + p_next[i] = pnew; + } else { + // Skip last level + p_next[i] = State::Sx; + } + } + } + + g = std::move(g_next); + p = std::move(p_next); + } + + // Sum layer, g[i] is COUT of bit i + // With CIN 0: + // sum[0] = p_pre[0] + // sum[i] = p_pre[i] ^ g[i-1] ... + module->connect(y[0], p_pre[0]); + for (int i = 1; i < width; i++) + module->addXorGate(NEW_ID, p_pre[i], g[i - 1], y[i]); +} + /** * emit_final_adder() - Emit the final carry-propagate addition between the two reduced vectors * @module:The Yosys module to which the compressors will be added @@ -323,9 +404,8 @@ inline Cell *emit_final_adder(Module *module, SigSpec a, SigSpec b, SigSpec y, F return module->addAdd(NEW_ID, a, b, y, false); } case FinalAdder::PARALLEL_PREFIX: { - Cell *c = module->addAdd(NEW_ID, a, b, y,false); - c->set_string_attribute(ID(adder_arch), "parallel_prefix"); - return c; + emit_kogge_stone(module, a, b, y); + return nullptr; } case FinalAdder::ELARITH_MOP_CSV: { Cell *c = module->addCell(NEW_ID, IdString("\\AddMopCsv")); @@ -347,6 +427,7 @@ inline FinalAdder pick_final_adder(int width, FinalMode mode) { switch (mode) { case FinalMode::RIPPLE: return FinalAdder::RIPPLE; case FinalMode::PREFIX: return FinalAdder::PARALLEL_PREFIX; + case FinalMode::ELARITH: return FinalAdder::ELARITH_MOP_CSV; case FinalMode::AUTO: default: return (width < RIPPLE_PREFIX_THRESHOLD) ? FinalAdder::DEFAULT : FinalAdder::PARALLEL_PREFIX; } diff --git a/passes/techmap/arith_tree.cc b/passes/techmap/arith_tree.cc index 11eec0c14..621c1becf 100644 --- a/passes/techmap/arith_tree.cc +++ b/passes/techmap/arith_tree.cc @@ -479,10 +479,6 @@ struct ArithTreePass : public Pass { log("\n"); log(" -final \n"); log(" Selects the architecture used for the final two-vector add.\n"); - log(" 'auto' (default) emits a ripple-style $add for narrow widths\n"); - log(" (< 16 bits) and a parallel prefix hinted $add for wider ones.\n"); - log(" 'elarith' emits an \\AddCfast black-box from the ELArith\n"); - log(" library; the surrounding flow must provide that module.\n"); log("\n"); log(" -no-fma\n"); log(" Disable fused multiply-add expansion in $macc cells\n"); diff --git a/tests/arith_tree/arith_tree_add_chains.ys b/tests/arith_tree/arith_tree_add_chains.ys index f293ed9da..7fd59e2ee 100644 --- a/tests/arith_tree/arith_tree_add_chains.ys +++ b/tests/arith_tree/arith_tree_add_chains.ys @@ -8,7 +8,7 @@ endmodule EOT hierarchy -auto-top proc -arith_tree +arith_tree -final ripple select -assert-count 1 t:$fa select -assert-count 1 t:$add design -reset @@ -23,7 +23,7 @@ endmodule EOT hierarchy -auto-top proc -arith_tree +arith_tree -final ripple select -assert-count 3 t:$fa select -assert-count 1 t:$add design -reset @@ -38,7 +38,7 @@ endmodule EOT hierarchy -auto-top proc -arith_tree +arith_tree -final ripple select -assert-count 6 t:$fa select -assert-count 1 t:$add design -reset @@ -55,7 +55,7 @@ endmodule EOT hierarchy -auto-top proc -arith_tree +arith_tree -final ripple select -assert-count 14 t:$fa select -assert-count 1 t:$add design -reset @@ -76,7 +76,7 @@ endmodule EOT hierarchy -auto-top select -assert-count 2 t:$alu -arith_tree +arith_tree -final ripple opt_clean select -assert-count 1 t:$fa select -assert-count 1 t:$add @@ -102,7 +102,7 @@ endmodule EOT hierarchy -auto-top select -assert-count 3 t:$alu -arith_tree +arith_tree -final ripple opt_clean select -assert-count 2 t:$fa select -assert-count 1 t:$add @@ -131,7 +131,7 @@ endmodule EOT hierarchy -auto-top select -assert-count 4 t:$alu -arith_tree +arith_tree -final ripple opt_clean select -assert-count 3 t:$fa select -assert-count 1 t:$add @@ -151,7 +151,7 @@ hierarchy -auto-top proc alumacc opt -arith_tree +arith_tree -final ripple opt_clean select -assert-count 1 t:$fa select -assert-count 1 t:$add @@ -170,7 +170,7 @@ hierarchy -auto-top proc alumacc opt -arith_tree +arith_tree -final ripple opt_clean select -assert-count 3 t:$fa select -assert-count 1 t:$add @@ -189,7 +189,7 @@ hierarchy -auto-top proc alumacc opt -arith_tree +arith_tree -final ripple opt_clean select -assert-count 6 t:$fa select -assert-count 1 t:$add diff --git a/tests/arith_tree/arith_tree_defaults.ys b/tests/arith_tree/arith_tree_defaults.ys index 1fb73e82e..b7b72062c 100644 --- a/tests/arith_tree/arith_tree_defaults.ys +++ b/tests/arith_tree/arith_tree_defaults.ys @@ -31,14 +31,11 @@ proc alumacc opt arith_tree -select -assert-count 3 t:$fa -select -assert-count 1 t:$add -select -assert-count 0 t:$macc t:$macc_v2 %u -select -assert-count 0 t:$mul +stat arith_tree -select -assert-count 3 t:$fa select -assert-count 1 t:$add -select -assert-count 0 t:$macc t:$macc_v2 %u +select -assert-count 0 t:$macc +select -assert-count 0 t:$macc_v2 select -assert-count 0 t:$mul design -reset @@ -55,8 +52,9 @@ proc equiv_opt arith_tree design -load postopt select -assert-count 2 t:$fa -select -assert-count 2 t:$fa c:*emit_compressor_42* %i -select -assert-count 1 t:$add a:adder_arch=parallel_prefix %i +select -assert-none t:$add +select -assert-min 1 t:$_AND_ +select -assert-min 1 t:$_XOR_ design -reset read_verilog <