diff --git a/techlibs/quicklogic/Makefile.inc b/techlibs/quicklogic/Makefile.inc index a54a7ec03..40f6af3d3 100644 --- a/techlibs/quicklogic/Makefile.inc +++ b/techlibs/quicklogic/Makefile.inc @@ -11,9 +11,12 @@ OBJS += techlibs/quicklogic/ql_ioff.o # -------------------------------------- OBJS += techlibs/quicklogic/ql_dsp_macc.o -GENFILES += techlibs/quicklogic/ql_dsp_macc_pm.h techlibs/quicklogic/qlf_k6n10f/bram_types_sim.v +OBJS += techlibs/quicklogic/ql_dsp.o +GENFILES += techlibs/quicklogic/ql_dsp_macc_pm.h techlibs/quicklogic/ql_dsp_pm.h techlibs/quicklogic/qlf_k6n10f/bram_types_sim.v techlibs/quicklogic/ql_dsp_macc.o: techlibs/quicklogic/ql_dsp_macc_pm.h +techlibs/quicklogic/ql_dsp.o: techlibs/quicklogic/ql_dsp_pm.h $(eval $(call add_extra_objs,techlibs/quicklogic/ql_dsp_macc_pm.h)) +$(eval $(call add_extra_objs,techlibs/quicklogic/ql_dsp_pm.h)) # -------------------------------------- @@ -36,9 +39,13 @@ $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf $(eval $(call add_gen_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/bram_types_sim.v)) $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/cells_sim.v)) $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/ffs_map.v)) -$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dsp_sim.v)) -$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dsp_map.v)) -$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dsp_final_map.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv1_sim.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv1_sim_extra.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv1_map.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv1_final_map.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv2_sim.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv2_map.v)) +$(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/dspv2_final_map.v)) $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/TDP18K_FIFO.v)) $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/ufifo_ctl.v)) $(eval $(call add_share_file,share/quicklogic/qlf_k6n10f,techlibs/quicklogic/qlf_k6n10f/sram1024x18_mem.v)) diff --git a/techlibs/quicklogic/ql_dsp.cc b/techlibs/quicklogic/ql_dsp.cc new file mode 100644 index 000000000..b446a6bc6 --- /dev/null +++ b/techlibs/quicklogic/ql_dsp.cc @@ -0,0 +1,134 @@ +/* + * yosys -- Yosys Open SYnthesis Suite + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "kernel/rtlil.h" +#include "kernel/register.h" +#include "kernel/sigtools.h" + +PRIVATE_NAMESPACE_BEGIN +USING_YOSYS_NAMESPACE + +// promote dspv2_16x9x32_cfg_ports to dspv2_32x18x64_cfg_ports if need be +bool promote(Module *m, Cell *cell) { + if (cell->type == ID(dspv2_32x18x64_cfg_ports)) { + return false; + } else { + log_assert(cell->type == ID(dspv2_16x9x32_cfg_ports)); + } + + auto widen_output = [&](IdString port_name, int new_width) { + if (!cell->hasPort(port_name)) + return; + SigSpec port = cell->getPort(port_name); + if (port.size() < new_width) { + port = {m->addWire(NEW_ID, new_width - port.size()), port}; + cell->setPort(port_name, port); + } + }; + + auto widen_input = [&](IdString port_name, int new_width) { + if (!cell->hasPort(port_name)) + return; + SigSpec port = cell->getPort(port_name); + if (port.size() < new_width) { + port.extend_u0(new_width, /* is_signed= */ true); + cell->setPort(port_name, port); + } + }; + + widen_output(ID(z_o), 50); + widen_output(ID(a_cout_o), 32); + widen_output(ID(b_cout_o), 18); + widen_output(ID(z_cout_o), 50); + + auto uses_port = [&](IdString port_name) { + return cell->hasPort(port_name) && !cell->getPort(port_name).is_fully_undef(); + }; + + if (uses_port(ID(a_cin_i)) || uses_port(ID(b_cin_i)) || uses_port(ID(z_cin_i))) { + log_error("Cannot promote %s (type %s) with cascading paths\n", log_id(cell), log_id(cell->type)); + } + + widen_input(ID(a_i), 32); + widen_input(ID(b_i), 18); + widen_input(ID(c_i), 18); + cell->type = ID(dspv2_32x18x64_cfg_ports); + return true; +} + +bool did_something; + +#include "techlibs/quicklogic/ql_dsp_pm.h" + +struct QlDspPass : Pass { + QlDspPass() : Pass("ql_dsp", "pack into QuickLogic DSPs") {} + + void help() override + { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" ql_dsp [selection]\n"); + log("\n"); + log("This pass packs input and output path registers into QuickLogic DSP blocks,\n"); + log("additionally it supports Z path cascading and post-adder packing.\n"); + log("\n"); + log(" -nocascade\n"); + log(" forbid cascading\n"); + log("\n"); + + } + + void execute(std::vector args, RTLIL::Design *d) override + { + log_header(d, "Executing QL_DSP pass. (pack into QuickLogic DSPs)\n"); + + bool nocascade = false; + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-nocascade") { + nocascade = true; + continue; + } + break; + } + extra_args(args, argidx, d); + + for (auto module : d->selected_modules()) { + did_something = true; + + while (did_something) + { + // TODO: could be optimized by more reuse of the pmgen object + did_something = false; + { + ql_dsp_pm pm(module, module->selected_cells()); + pm.run_ql_dsp_pack_regs(); + } + if (!nocascade) { + ql_dsp_pm pm(module, module->selected_cells()); + pm.run_ql_dsp_cascade(); + } + { + ql_dsp_pm pm(module, module->selected_cells()); + pm.run_ql_dsp_pack_regs(); + } + } + } + } +} QlDspPass; + +PRIVATE_NAMESPACE_END diff --git a/techlibs/quicklogic/ql_dsp.pmg b/techlibs/quicklogic/ql_dsp.pmg new file mode 100644 index 000000000..e5c712957 --- /dev/null +++ b/techlibs/quicklogic/ql_dsp.pmg @@ -0,0 +1,272 @@ +// derived from passes/pmgen/xilinx_dsp.pmg +pattern ql_dsp_pack_regs + +state clock reset +state clock_inferred + +// Variables used for subpatterns +state argQ argD +udata dffD dffQ +udata dffclock dffreset +udata dff + +match dsp + select dsp->type.in(\dspv2_32x18x64_cfg_ports, \dspv2_16x9x32_cfg_ports) +endmatch + +code clock_inferred clock reset + clock_inferred = false; + clock = port(dsp, \clock_i); + reset = port(dsp, \reset_i); +endcode + +// try packing on Z output +code argD clock_inferred clock reset + if (port(dsp, \output_select_i)[2] == RTLIL::S0 && + (!dsp->hasPort(\z_cout_o) || nusers(port(dsp, \z_cout_o)) == 1) && + nusers(port(dsp, \z_o)) == 2) { + argD = port(dsp, \z_o); + subpattern(out_dffe); + if (dff) { + clock_inferred = true; + clock = dffclock; + reset = dffreset; + log("%s: inferring Z path register from flip-flop %s\n", log_id(dsp), log_id(dff)); + dsp->connections_[\output_select_i][2] = RTLIL::S1; + dsp->setPort(\z_o, dffQ); + did_something = true; + } + } +endcode + +// try packing on B input +code argQ clock_inferred clock reset + if ((!dsp->hasPort(\b_cout_o) || nusers(port(dsp, \b_cout_o)) == 1) && + !param(dsp, \B_REG).as_bool()) { + argQ = port(dsp, \b_i); + subpattern(in_dffe); + if (dff) { + clock_inferred = true; + clock = dffclock; + reset = dffreset; + log("%s: inferring B path register from flip-flop %s\n", log_id(dsp), log_id(dff)); + dsp->parameters[\B_REG] = Const(1, 1); + dsp->setPort(\b_i, dffD); + did_something = true; + } + } +endcode + +// try packing on A input +code argQ clock_inferred clock reset + if ((!dsp->hasPort(\a_cout_o) || nusers(port(dsp, \a_cout_o)) == 1) && + !param(dsp, \A_REG).as_bool()) { + argQ = port(dsp, \a_i); + subpattern(in_dffe); + if (dff) { + clock_inferred = true; + clock = dffclock; + reset = dffreset; + log("%s: inferring A path register from flip-flop %s\n", log_id(dsp), log_id(dff)); + dsp->parameters[\A_REG] = Const(1, 1); + dsp->setPort(\a_i, dffD); + did_something = true; + } + } +endcode + +code + if (clock_inferred) { + dsp->setPort(\clock_i, clock); + dsp->setPort(\reset_i, reset); + } +endcode + +// ####################### +// Subpattern for matching against input registers, based on knowledge of the +// 'Q' output. +subpattern in_dffe +arg argQ clock reset + +code + dff = nullptr; + if (argQ.empty()) + reject; + for (const auto &c : argQ.chunks()) { + if (!c.wire) { + // Abandon matches when constant Q bits are non-zero + // (doesn't match DSPv2 init/reset behavior) + if (!SigSpec(c).is_fully_zero()) + reject; + continue; + } + + // Abandon matches when 'Q' has the keep attribute set + if (c.wire->get_bool_attribute(\keep)) + reject; + // Abandon matches when 'Q' has a non-zero init attribute set (not supported by DSPv2) + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx && b != State::S0) + reject; + } +endcode + +match ff + select ff->type.in($dff, $dffe, $adff, $adffe) + // DSPv2 does not support polarity inversion + select param(ff, \CLK_POLARITY).as_bool() + + // Check that reset value, if present, is fully 0. + filter ff->type.in($dff, $dffe) || param(ff, \ARST_VALUE).is_fully_zero() + + // Check reset polarity, if present + filter ff->type.in($dff, $dffe) || param(ff, \ARST_POLARITY).as_bool() + + // Check that the LSB argQ bit is present (the rest follow by the nusers(...)=2 condition) + slice offset GetSize(port(ff, \D)) + index port(ff, \Q)[offset] === argQ[0] + + define ff_reset (ff->type.in($dff, $dffe) ? RTLIL::S0 : port(ff, \ARST)) + filter clock == RTLIL::Sx || port(ff, \CLK)[0] == clock + filter clock == RTLIL::Sx || ff_reset == reset +endmatch + +code argD + dff = ff; + dffclock = port(ff, \CLK); + dffreset = (ff->type.in($dff, $dffe) ? RTLIL::S0 : port(ff, \ARST)); + dffD = argQ; + dffD.replace(port(ff, \Q), port(ff, \D)); +endcode + + +// ####################### +// Subpattern for matching against output registers, based on knowledge of the +// 'D' input. + +subpattern out_dffe +arg argD clock reset + +code + dff = nullptr; + if (argD.empty()) + reject; + for (const auto &c : argD.chunks()) { + // Abandon matches when 'D' has the keep attribute set + if (!c.wire || c.wire->get_bool_attribute(\keep)) + reject; + } +endcode + +match ff + select ff->type.in($dff, $dffe, $adff, $adffe) + // DSPv2 does not support polarity inversion + select param(ff, \CLK_POLARITY).as_bool() + + // Check that reset value, if present, is fully 0. + filter ff->type.in($dff, $dffe) || param(ff, \ARST_VALUE).is_fully_zero() + + // Check reset polarity, if present + filter ff->type.in($dff, $dffe) || param(ff, \ARST_POLARITY).as_bool() + + slice offset GetSize(port(ff, \D)) + index port(ff, \D)[offset] === argD[0] + + define ff_reset (ff->type.in($dff, $dffe) ? RTLIL::S0 : port(ff, \ARST)) + filter clock == RTLIL::Sx || port(ff, \CLK)[0] == clock + filter clock == RTLIL::Sx || ff_reset == reset +endmatch + +code + dff = ff; + dffclock = port(ff, \CLK); + dffreset = (ff->type.in($dff, $dffe) ? RTLIL::S0 : port(ff, \ARST)); + dffQ = argD; + dffQ.replace(port(ff, \D), port(ff, \Q)); + + // Abandon matches when 'Q' has a defined init attribute set + // (not supported by DSPv2) + for (auto c : dffQ.chunks()) { + Const init = c.wire->attributes.at(\init, Const()); + if (!init.empty()) + for (auto b : init.extract(c.offset, c.width)) + if (b != State::Sx) + reject; + } + + { + // Rewire retired flip-flop slice + SigSpec D = port(ff, \D); + SigSpec Q = port(ff, \Q); + D.replace(argD, module->addWire(NEW_ID, argD.size()), &Q); + D.replace(argD, Const(RTLIL::Sx, argD.size())); + ff->setPort(\D, D); + ff->setPort(\Q, Q); + } +endcode + +pattern ql_dsp_cascade + +match dsp1 + select dsp1->type.in(\dspv2_32x18x64_cfg_ports, \dspv2_16x9x32_cfg_ports) + filter !dsp1->hasPort(\z_cout_o) || nusers(port(dsp1, \z_cout_o)) == 1 +endmatch + +match dsp2 + select dsp2->type.in(\dspv2_32x18x64_cfg_ports, \dspv2_16x9x32_cfg_ports) + filter port(dsp2, \output_select_i).is_fully_const() + define output_sel port(dsp2, \output_select_i).as_int() + filter output_sel == 0 || (output_sel == 4 && !param(dsp2, \M_REG).as_bool()) + // expect `dsp2` and `add` for exclusive users + filter nusers(port(dsp2, \z_o)) == 2 + filter !dsp2->hasPort(\z_cout_o) || nusers(port(dsp2, \z_cout_o)) == 1 + filter dsp1 != dsp2 +endmatch + +match add + select add->type.in($add, $sub) + define width param(add, \Y_WIDTH).as_int() + + index port(add, \A)[0] === port(dsp1, \z_o)[0] + filter port(add, \A).size() >= width && port(dsp1, \z_o).size() >= width + filter port(add, \A).extract(0, width) == port(dsp1, \z_o).extract(0, width) + + index port(add, \B)[0] === port(dsp2, \z_o)[0] + filter port(add, \B).size() >= width && port(dsp2, \z_o).size() >= width + filter port(add, \B).extract(0, width) == port(dsp2, \z_o).extract(0, width) +endmatch + +code + const int z_width = 50; + + log("%s: inferring post-adder from %s (type %s)\n", log_id(dsp2), log_id(add), log_id(add->type)); + if (promote(module, dsp1)) + log(" - promoting %s to non-fractured DSP block\n", log_id(dsp1)); + if (promote(module, dsp2)) + log(" - promoting %s to non-fractured DSP block\n", log_id(dsp2)); + + // link up z_cout_o of dsp1 to z_cin_i of dsp2 + Wire *link = module->addWire(NEW_ID, z_width); + dsp1->setPort(\z_cout_o, link); + dsp2->setPort(\z_cin_i, link); + + // configure the path inside dsp2 + if (port(dsp2, \output_select_i).as_int() == 4) { + log("%s: inferring M register\n", log_id(dsp2)); + dsp2->setParam(\M_REG, Const(1, 1)); + } + dsp2->setParam(\SUBTRACT, Const(add->type == $sub, 1)); + dsp2->setPort(\feedback_i, Const(3, 3)); + dsp2->setPort(\output_select_i, Const(3, 3)); + dsp2->setParam(\ROUND, Const(0, 3)); + dsp2->setParam(\SHIFT_REG, Const(0, 6)); + dsp2->setParam(\SATURATE, Const(0, 1)); + dsp2->setParam(\ZCIN_REG, Const(1, 1)); + dsp2->setPort(\z_o, {port(dsp2, \z_o).extract_end(port(add, \Y).size()), port(add, \Y)}); + + did_something = true; + autoremove(add); + accept; +endcode diff --git a/techlibs/quicklogic/ql_dsp_io_regs.cc b/techlibs/quicklogic/ql_dsp_io_regs.cc index ecf163dbf..922feadd0 100644 --- a/techlibs/quicklogic/ql_dsp_io_regs.cc +++ b/techlibs/quicklogic/ql_dsp_io_regs.cc @@ -44,20 +44,32 @@ struct QlDspIORegs : public Pass { log("\n"); log("This pass looks for QL_DSP2 cells and changes their cell type depending on their\n"); log("configuration.\n"); + log("\n"); + log(" -dspv2\n"); + log(" target DSPv2.\n"); + log("\n"); } void execute(std::vector a_Args, RTLIL::Design *a_Design) override { log_header(a_Design, "Executing QL_DSP_IO_REGS pass.\n"); + bool target_dspv2 = false; size_t argidx; for (argidx = 1; argidx < a_Args.size(); argidx++) { + if (a_Args[argidx] == "-dspv2") { + target_dspv2 = true; + continue; + } break; } extra_args(a_Args, argidx, a_Design); for (auto module : a_Design->selected_modules()) { - ql_dsp_io_regs_pass(module); + if (target_dspv2) + ql_dsp_io_regs_pass_v2(module); + else + ql_dsp_io_regs_pass(module); } } @@ -127,6 +139,7 @@ struct QlDspIORegs : public Pass { } // Set new type name + log_debug("Converted %s to %s\n", log_id(cell->type), new_type.c_str()); cell->type = RTLIL::IdString(new_type); std::vector ports2del; @@ -152,6 +165,146 @@ struct QlDspIORegs : public Pass { } } } + + + void ql_dsp_io_regs_pass_v2(Module *module) + { + sigmap.set(module); + + for (auto cell : module->cells()) { + if (cell->type != ID(QL_DSPV2)) + continue; + + // If the cell does not have the "is_inferred" attribute set + // then don't touch it. + if (!cell->get_bool_attribute(ID(is_inferred))) + continue; + + if (!cell->hasPort(ID(output_select)) || + !sigmap(cell->getPort(ID(output_select))).is_fully_def() || + !cell->hasParam(ID(MODE_BITS)) || + cell->getParam(ID(MODE_BITS)).size() != 72) { + log_error("Missing configuration tie-offs or parameters on DSP cell %s\n", + log_id(cell)); + } + int out_sel_i = sigmap(cell->getPort(ID(output_select))).as_int(); + Const mode = cell->getParam(ID(MODE_BITS)); + + // Get the feedback port + if (!cell->hasPort(ID(feedback))) + log_error("Missing 'feedback' port on %s", log_id(cell)); + SigSpec feedback = sigmap(cell->getPort(ID(feedback))); + + bool a_reg = mode[61] != RTLIL::S0; + bool b_reg = mode[63] != RTLIL::S0; + + // Build new type name + std::string new_type = "\\QL_DSPV2_MULT"; + + // Decide if we should be deleting the clock port + bool del_clk = true; + + if (a_reg != b_reg) { + // no specialized type for mixed scenario + continue; + } + + enum { + MULT, + MULTADD, + MULTACC, + Unrecognized + } base_function = Unrecognized; + + switch (out_sel_i) { + case 0: + case 4: + base_function = MULT; + break; + case 1: + case 5: + case 2: + case 3: + case 6: + case 7: + if (feedback.is_fully_def() && (feedback.as_int() == 2 || feedback.as_int() == 3)) { + del_clk = false; + new_type += "ADD"; + base_function = MULTADD; + break; + } else if (feedback.extract(1, 2).is_fully_zero()) { + del_clk = false; + new_type += "ACC"; + base_function = MULTACC; + break; + } else { + base_function = Unrecognized; + } + break; + default: + break; + } + + if (base_function == Unrecognized) { + continue; + } + + if (a_reg && b_reg) { + del_clk = false; + new_type += "_REGIN"; + } + + if (out_sel_i > 3) { + del_clk = false; + new_type += "_REGOUT"; + } + + // Set new type name + log_debug("Converted %s to %s\n", log_id(cell->type), new_type.c_str()); + cell->type = RTLIL::IdString(new_type); + + std::vector ports2del; + + if (del_clk) { + cell->unsetPort(ID(clk)); + cell->unsetPort(ID(reset)); + } + + switch (base_function) { + case MULTACC: { + static const std::vector to_del = { + ID(c), ID(a_cin), ID(b_cin), ID(z_cin), + ID(a_cout), ID(b_cout) + }; + + for (auto port : to_del) + cell->unsetPort(port); + break; + } + case MULTADD: { + static const std::vector to_del = { + ID(c), ID(a_cin), ID(b_cin), ID(a_cout), ID(b_cout) + }; + + for (auto port : to_del) + cell->unsetPort(port); + break; + } + case MULT: { + static const std::vector to_del = { + ID(c), ID(load_acc), ID(acc_reset), + ID(a_cin), ID(b_cin), ID(z_cin), ID(a_cout), ID(b_cout) + }; + + for (auto port : to_del) + cell->unsetPort(port); + break; + } + default: + ; + } + } + } } QlDspIORegs; PRIVATE_NAMESPACE_END diff --git a/techlibs/quicklogic/ql_dsp_macc.cc b/techlibs/quicklogic/ql_dsp_macc.cc index f0669da6c..b4e17f04b 100644 --- a/techlibs/quicklogic/ql_dsp_macc.cc +++ b/techlibs/quicklogic/ql_dsp_macc.cc @@ -27,158 +27,303 @@ PRIVATE_NAMESPACE_BEGIN // ============================================================================ -static void create_ql_macc_dsp(ql_dsp_macc_pm &pm) +static void create_ql_macc_dsp_v1(ql_dsp_macc_pm &pm) { - auto &st = pm.st_ql_dsp_macc; + auto &st = pm.st_ql_dsp_macc; - // Get port widths - size_t a_width = GetSize(st.mul->getPort(ID(A))); - size_t b_width = GetSize(st.mul->getPort(ID(B))); - size_t z_width = GetSize(st.ff->getPort(ID(Q))); + // Get port widths + size_t a_width = GetSize(st.mul->getPort(ID::A)); + size_t b_width = GetSize(st.mul->getPort(ID::B)); + size_t z_width = GetSize(st.ff->getPort(ID::Q)); - size_t min_width = std::min(a_width, b_width); - size_t max_width = std::max(a_width, b_width); + size_t min_width = std::min(a_width, b_width); + size_t max_width = std::max(a_width, b_width); - // Signed / unsigned - bool ab_signed = st.mul->getParam(ID(A_SIGNED)).as_bool(); - log_assert(ab_signed == st.mul->getParam(ID(B_SIGNED)).as_bool()); + // Signed / unsigned + bool ab_signed = st.mul->getParam(ID::A_SIGNED).as_bool(); + log_assert(ab_signed == st.mul->getParam(ID::B_SIGNED).as_bool()); - // Determine DSP type or discard if too narrow / wide - RTLIL::IdString type; - size_t tgt_a_width; - size_t tgt_b_width; - size_t tgt_z_width; + // Determine DSP type or discard if too narrow / wide + RTLIL::IdString type; + size_t tgt_a_width; + size_t tgt_b_width; + size_t tgt_z_width; - string cell_base_name = "dsp_t1"; - string cell_size_name = ""; - string cell_cfg_name = ""; - string cell_full_name = ""; + string cell_base_name = "dsp_t1"; + string cell_size_name = ""; + string cell_cfg_name = ""; + string cell_full_name = ""; - if (min_width <= 2 && max_width <= 2 && z_width <= 4) { - log_debug("\trejected: too narrow (%zd %zd %zd)\n", min_width, max_width, z_width); - return; - } else if (min_width <= 9 && max_width <= 10 && z_width <= 19) { - cell_size_name = "_10x9x32"; - tgt_a_width = 10; - tgt_b_width = 9; - tgt_z_width = 19; - } else if (min_width <= 18 && max_width <= 20 && z_width <= 38) { - cell_size_name = "_20x18x64"; - tgt_a_width = 20; - tgt_b_width = 18; - tgt_z_width = 38; - } else { - log_debug("\trejected: too wide (%zd %zd %zd)\n", min_width, max_width, z_width); - return; - } + if (min_width <= 2 && max_width <= 2 && z_width <= 4) { + log_debug("\trejected: too narrow (%zd %zd %zd)\n", min_width, max_width, z_width); + return; + } else if (min_width <= 9 && max_width <= 10 && z_width <= 19) { + cell_size_name = "_10x9x32"; + tgt_a_width = 10; + tgt_b_width = 9; + tgt_z_width = 19; + } else if (min_width <= 18 && max_width <= 20 && z_width <= 38) { + cell_size_name = "_20x18x64"; + tgt_a_width = 20; + tgt_b_width = 18; + tgt_z_width = 38; + } else { + log_debug("\trejected: too wide (%zd %zd %zd)\n", min_width, max_width, z_width); + return; + } - type = RTLIL::escape_id(cell_base_name + cell_size_name + "_cfg_ports"); - log("Inferring MACC %zux%zu->%zu as %s from:\n", a_width, b_width, z_width, log_id(type)); + type = RTLIL::escape_id(cell_base_name + cell_size_name + "_cfg_ports"); + log("Inferring MACC %zux%zu->%zu as %s from:\n", a_width, b_width, z_width, log_id(type)); - for (auto cell : {st.mul, st.add, st.mux, st.ff}) - if (cell) - log(" %s (%s)\n", log_id(cell), log_id(cell->type)); + for (auto cell : {st.mul, st.add, st.mux, st.ff}) + if (cell) + log(" %s (%s)\n", log_id(cell), log_id(cell->type)); - // Add the DSP cell - RTLIL::Cell *cell = pm.module->addCell(NEW_ID, type); + // Add the DSP cell + RTLIL::Cell *cell = pm.module->addCell(NEW_ID, type); - // Set attributes - cell->set_bool_attribute(ID(is_inferred), true); + // Set attributes + cell->set_bool_attribute(ID(is_inferred), true); - // Get input/output data signals - RTLIL::SigSpec sig_a, sig_b, sig_z; - sig_a = st.mul->getPort(ID(A)); - sig_b = st.mul->getPort(ID(B)); - sig_z = st.output_registered ? st.ff->getPort(ID(Q)) : st.ff->getPort(ID(D)); + // Get input/output data signals + RTLIL::SigSpec sig_a, sig_b, sig_z; + sig_a = st.mul->getPort(ID::A); + sig_b = st.mul->getPort(ID::B); + sig_z = st.output_registered ? st.ff->getPort(ID::Q) : st.ff->getPort(ID::D); - if (a_width < b_width) - std::swap(sig_a, sig_b); + if (a_width < b_width) + std::swap(sig_a, sig_b); - // Connect input data ports, sign extend / pad with zeros - sig_a.extend_u0(tgt_a_width, ab_signed); - sig_b.extend_u0(tgt_b_width, ab_signed); - cell->setPort(ID(a_i), sig_a); - cell->setPort(ID(b_i), sig_b); + // Connect input data ports, sign extend / pad with zeros + sig_a.extend_u0(tgt_a_width, ab_signed); + sig_b.extend_u0(tgt_b_width, ab_signed); + cell->setPort(ID(a_i), sig_a); + cell->setPort(ID(b_i), sig_b); - // Connect output data port, pad if needed - if ((size_t) GetSize(sig_z) < tgt_z_width) { - auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z)); - sig_z.append(wire); - } - cell->setPort(ID(z_o), sig_z); + // Connect output data port, pad if needed + if ((size_t) GetSize(sig_z) < tgt_z_width) { + auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z)); + sig_z.append(wire); + } + cell->setPort(ID(z_o), sig_z); - // Connect clock, reset and enable - cell->setPort(ID(clock_i), st.ff->getPort(ID(CLK))); + // Connect clock, reset and enable + cell->setPort(ID(clock_i), st.ff->getPort(ID::CLK)); - RTLIL::SigSpec rst; - RTLIL::SigSpec ena; + RTLIL::SigSpec rst; + RTLIL::SigSpec ena; - if (st.ff->hasPort(ID(ARST))) { - if (st.ff->getParam(ID(ARST_POLARITY)).as_int() != 1) { - rst = pm.module->Not(NEW_ID, st.ff->getPort(ID(ARST))); - } else { - rst = st.ff->getPort(ID(ARST)); - } - } else { - rst = RTLIL::SigSpec(RTLIL::S0); - } + if (st.ff->hasPort(ID::ARST)) { + if (st.ff->getParam(ID::ARST_POLARITY).as_int() != 1) { + rst = pm.module->Not(NEW_ID, st.ff->getPort(ID::ARST)); + } else { + rst = st.ff->getPort(ID::ARST); + } + } else { + rst = RTLIL::SigSpec(RTLIL::S0); + } - if (st.ff->hasPort(ID(EN))) { - if (st.ff->getParam(ID(EN_POLARITY)).as_int() != 1) { - ena = pm.module->Not(NEW_ID, st.ff->getPort(ID(EN))); - } else { - ena = st.ff->getPort(ID(EN)); - } - } else { - ena = RTLIL::SigSpec(RTLIL::S1); - } + if (st.ff->hasPort(ID::EN)) { + if (st.ff->getParam(ID::EN_POLARITY).as_int() != 1) { + ena = pm.module->Not(NEW_ID, st.ff->getPort(ID::EN)); + } else { + ena = st.ff->getPort(ID::EN); + } + } else { + ena = RTLIL::SigSpec(RTLIL::S1); + } - cell->setPort(ID(reset_i), rst); - cell->setPort(ID(load_acc_i), ena); + cell->setPort(ID(reset_i), rst); + cell->setPort(ID(load_acc_i), ena); - // Insert feedback_i control logic used for clearing / loading the accumulator - if (st.mux_in_pattern) { - RTLIL::SigSpec sig_s = st.mux->getPort(ID(S)); + // Insert feedback_i control logic used for clearing / loading the accumulator + if (st.mux_in_pattern) { + RTLIL::SigSpec sig_s = st.mux->getPort(ID::S); - // Depending on the mux port ordering insert inverter if needed - log_assert(st.mux_ab.in(ID(A), ID(B))); - if (st.mux_ab == ID(A)) - sig_s = pm.module->Not(NEW_ID, sig_s); + // Depending on the mux port ordering insert inverter if needed + log_assert(st.mux_ab.in(ID::A, ID::B)); + if (st.mux_ab == ID::A) + sig_s = pm.module->Not(NEW_ID, sig_s); - // Assemble the full control signal for the feedback_i port - RTLIL::SigSpec sig_f; - sig_f.append(sig_s); - sig_f.append(RTLIL::S0); - sig_f.append(RTLIL::S0); - cell->setPort(ID(feedback_i), sig_f); - } - // No acc clear/load - else { - cell->setPort(ID(feedback_i), RTLIL::SigSpec(RTLIL::S0, 3)); - } + // Assemble the full control signal for the feedback_i port + RTLIL::SigSpec sig_f; + sig_f.append(sig_s); + sig_f.append(RTLIL::S0); + sig_f.append(RTLIL::S0); + cell->setPort(ID(feedback_i), sig_f); + } + // No acc clear/load + else { + cell->setPort(ID(feedback_i), RTLIL::SigSpec(RTLIL::S0, 3)); + } - // Connect control ports - cell->setPort(ID(unsigned_a_i), RTLIL::SigSpec(ab_signed ? RTLIL::S0 : RTLIL::S1)); - cell->setPort(ID(unsigned_b_i), RTLIL::SigSpec(ab_signed ? RTLIL::S0 : RTLIL::S1)); + // Connect control ports + cell->setPort(ID(unsigned_a_i), RTLIL::SigSpec(ab_signed ? RTLIL::S0 : RTLIL::S1)); + cell->setPort(ID(unsigned_b_i), RTLIL::SigSpec(ab_signed ? RTLIL::S0 : RTLIL::S1)); - // Connect config bits - cell->setPort(ID(saturate_enable_i), RTLIL::SigSpec(RTLIL::S0)); - cell->setPort(ID(shift_right_i), RTLIL::SigSpec(RTLIL::S0, 6)); - cell->setPort(ID(round_i), RTLIL::SigSpec(RTLIL::S0)); - cell->setPort(ID(register_inputs_i), RTLIL::SigSpec(RTLIL::S0)); - // 3 - output post acc; 1 - output pre acc - cell->setPort(ID(output_select_i), RTLIL::Const(st.output_registered ? 1 : 3, 3)); + // Connect config bits + cell->setPort(ID(saturate_enable_i), RTLIL::SigSpec(RTLIL::S0)); + cell->setPort(ID(shift_right_i), RTLIL::SigSpec(RTLIL::S0, 6)); + cell->setPort(ID(round_i), RTLIL::SigSpec(RTLIL::S0)); + cell->setPort(ID(register_inputs_i), RTLIL::SigSpec(RTLIL::S0)); + // 3 - output post acc; 1 - output pre acc + cell->setPort(ID(output_select_i), RTLIL::Const(st.output_registered ? 1 : 3, 3)); - bool subtract = (st.add->type == ID($sub)); - cell->setPort(ID(subtract_i), RTLIL::SigSpec(subtract ? RTLIL::S1 : RTLIL::S0)); + bool subtract = (st.add->type == ID($sub)); + cell->setPort(ID(subtract_i), RTLIL::SigSpec(subtract ? RTLIL::S1 : RTLIL::S0)); - // Mark the cells for removal - pm.autoremove(st.mul); - pm.autoremove(st.add); - if (st.mux != nullptr) { - pm.autoremove(st.mux); - } - pm.autoremove(st.ff); + // Mark the cells for removal + pm.autoremove(st.mul); + pm.autoremove(st.add); + if (st.mux != nullptr) { + pm.autoremove(st.mux); + } + pm.autoremove(st.ff); +} + +void create_ql_macc_dsp_v2(ql_dsp_macc_pm &pm) +{ + auto &st = pm.st_ql_dsp_macc; + + SigSpec sig_a = st.mul->getPort(ID::A); + SigSpec sig_b = st.mul->getPort(ID::B); + + if (sig_a.size() < sig_b.size()) + std::swap(sig_a, sig_b); + + // Signed / unsigned + bool ab_signed = st.mul->getParam(ID::A_SIGNED).as_bool(); + log_assert(ab_signed == st.mul->getParam(ID::B_SIGNED).as_bool()); + + int z_width = GetSize(st.ff->getPort(ID::Q)); + if (!ab_signed) { + if (sig_a.msb() != RTLIL::S0 && sig_a.size() < z_width) + sig_a.append(RTLIL::S0); + if (sig_b.msb() != RTLIL::S0 && sig_b.size() < z_width) + sig_b.append(RTLIL::S0); + } + int a_width = GetSize(sig_a); + int b_width = GetSize(sig_b); + + // Determine DSP type or discard if too narrow / wide + RTLIL::IdString type; + size_t tgt_a_width; + size_t tgt_b_width; + size_t tgt_z_width; + + string cell_base_name = "dspv2"; + string cell_size_name = ""; + string cell_cfg_name = ""; + string cell_full_name = ""; + + if (a_width <= 2 && b_width <= 2 && z_width <= 4) { + log_debug("\trejected: too narrow (%d %d %d)\n", a_width, b_width, z_width); + return; + } else if (a_width <= 16 && b_width <= 9 && z_width <= 25) { + cell_size_name = "_16x9x32"; + tgt_a_width = 16; + tgt_b_width = 9; + tgt_z_width = 25; // TODO + } else if (a_width <= 32 && b_width <= 18 && z_width <= 50) { + cell_size_name = "_32x18x64"; + tgt_a_width = 32; + tgt_b_width = 18; + tgt_z_width = 50; + } else { + log_debug("\trejected: too wide (%d %d %d)\n", a_width, b_width, z_width); + return; + } + + type = RTLIL::escape_id(cell_base_name + cell_size_name + "_cfg_ports"); + log("Inferring MACC %dx%d->%d as %s from:\n", a_width, b_width, z_width, log_id(type)); + + for (auto cell : {st.mul, st.add, st.mux, st.ff}) + if (cell) + log(" %s (%s)\n", log_id(cell), log_id(cell->type)); + + // Add the DSP cell + RTLIL::Cell *cell = pm.module->addCell(NEW_ID, type); + + // Set attributes + cell->set_bool_attribute(ID(is_inferred), true); + + // Get input/output data signals + SigSpec sig_z; + sig_z = st.output_registered ? st.ff->getPort(ID::Q) : st.ff->getPort(ID::D); + + // Connect input data ports, sign extend / pad with zeros + sig_a.extend_u0(tgt_a_width, true); + sig_b.extend_u0(tgt_b_width, true); + cell->setPort(ID(a_i), sig_a); + cell->setPort(ID(b_i), sig_b); + cell->setPort(ID(c_i), SigSpec(RTLIL::S0, tgt_b_width)); + + // Connect output data port, pad if needed + if ((size_t) GetSize(sig_z) < tgt_z_width) { + auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z)); + sig_z.append(wire); + } + cell->setPort(ID(z_o), sig_z); + + // Connect clock, reset and enable + cell->setPort(ID(clock_i), st.ff->getPort(ID::CLK)); + + RTLIL::SigSpec rst; + RTLIL::SigSpec ena; + + if (st.ff->hasPort(ID::ARST)) { + if (st.ff->getParam(ID::ARST_POLARITY).as_int() != 1) { + rst = pm.module->Not(NEW_ID, st.ff->getPort(ID::ARST)); + } else { + rst = st.ff->getPort(ID::ARST); + } + } else { + rst = RTLIL::SigSpec(RTLIL::S0); + } + + if (st.ff->hasPort(ID::EN)) { + if (st.ff->getParam(ID::EN_POLARITY).as_int() != 1) { + ena = pm.module->Not(NEW_ID, st.ff->getPort(ID::EN)); + } else { + ena = st.ff->getPort(ID::EN); + } + } else { + ena = RTLIL::SigSpec(RTLIL::S1); + } + + cell->setPort(ID(reset_i), rst); + cell->setPort(ID(load_acc_i), ena); + + // Insert feedback_i control logic used for clearing / loading the accumulator + if (st.mux_in_pattern) { + RTLIL::SigSpec sig_s = st.mux->getPort(ID::S); + + // Depending on the mux port ordering insert inverter if needed + log_assert(st.mux_ab.in(ID::A, ID::B)); + if (st.mux_ab == ID::A) + sig_s = pm.module->Not(NEW_ID, sig_s); + + cell->setPort(ID(feedback_i), {RTLIL::S0, RTLIL::S0, sig_s}); + } + // No acc clear/load + else { + cell->setPort(ID(feedback_i), RTLIL::SigSpec(RTLIL::S0, 3)); + } + + cell->setPort(ID(acc_reset_i), RTLIL::SigSpec(RTLIL::S0)); + // 3 - output post acc; 1 - output pre acc + cell->setPort(ID(output_select_i), RTLIL::Const(st.output_registered ? 1 : 3, 3)); + + bool subtract = (st.add->type == ID($sub)); + cell->setParam(ID(SUBTRACT), RTLIL::Const(subtract ? RTLIL::S1 : RTLIL::S0)); + + // Mark the cells for removal + pm.autoremove(st.mul); + pm.autoremove(st.add); + if (st.mux != nullptr) { + pm.autoremove(st.mux); + } + pm.autoremove(st.ff); } struct QlDspMacc : public Pass { @@ -186,27 +331,41 @@ struct QlDspMacc : public Pass { void help() override { - // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| log("\n"); log(" ql_dsp_macc [selection]\n"); - log("\n"); - log("This pass looks for a multiply-accumulate pattern based on which it infers a\n"); - log("QuickLogic DSP cell.\n"); + log("\n"); + log("This pass looks for a multiply-accumulate pattern based on which it infers a\n"); + log("QuickLogic DSP cell.\n"); + log("\n"); + log(" -dspv2\n"); + log(" target DSPv2.\n"); log("\n"); } - void execute(std::vector a_Args, RTLIL::Design *a_Design) override + void execute(std::vector args, RTLIL::Design *design) override { - log_header(a_Design, "Executing QL_DSP_MACC pass.\n"); + log_header(design, "Executing QL_DSP_MACC pass.\n"); + bool target_dspv2 = false; size_t argidx; - for (argidx = 1; argidx < a_Args.size(); argidx++) { + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-dspv2") { + target_dspv2 = true; + continue; + } break; } - extra_args(a_Args, argidx, a_Design); + extra_args(args, argidx, design); - for (auto module : a_Design->selected_modules()) - ql_dsp_macc_pm(module, module->selected_cells()).run_ql_dsp_macc(create_ql_macc_dsp); + for (auto module : design->selected_modules()) { + ql_dsp_macc_pm pm(module, module->selected_cells()); + + if (target_dspv2) + pm.run_ql_dsp_macc(create_ql_macc_dsp_v2); + else + pm.run_ql_dsp_macc(create_ql_macc_dsp_v1); + } } } QlDspMacc; diff --git a/techlibs/quicklogic/ql_dsp_simd.cc b/techlibs/quicklogic/ql_dsp_simd.cc index fdd2de406..86722c66b 100644 --- a/techlibs/quicklogic/ql_dsp_simd.cc +++ b/techlibs/quicklogic/ql_dsp_simd.cc @@ -29,7 +29,7 @@ PRIVATE_NAMESPACE_BEGIN struct QlDspSimdPass : public Pass { - QlDspSimdPass() : Pass("ql_dsp_simd", "merge QuickLogic K6N10f DSP pairs to operate in SIMD mode") {} + QlDspSimdPass() : Pass("ql_dsp_simd", "merge QuickLogic K6N10f DSP pairs to operate in fractured mode") {} void help() override { @@ -37,16 +37,17 @@ struct QlDspSimdPass : public Pass { log("\n"); log(" ql_dsp_simd [selection]\n"); log("\n"); - log("This pass identifies K6N10f DSP cells with identical configuration and pack pairs\n"); - log("of them together into other DSP cells that can perform SIMD operation.\n"); + log("This pass identifies K6N10f DSP cells with identical configuration and merges\n"); + log("pairs of them, enabling fractured mode.\n"); } // .......................................... - /// Describes DSP config unique to a whole DSP cell + /// Describes DSP config unique to a DSP cell struct DspConfig { // Port connections dict connections; + dict parameters; DspConfig() = default; @@ -55,60 +56,136 @@ struct QlDspSimdPass : public Pass { [[nodiscard]] Hasher hash_into(Hasher h) const { h.eat(connections); return h; } - bool operator==(const DspConfig &ref) const { return connections == ref.connections; } + bool operator==(const DspConfig &ref) const { return connections == ref.connections && parameters == ref.parameters; } }; // .......................................... - const int m_ModeBitsSize = 80; - - // DSP parameters - const std::vector m_DspParams = {"COEFF_3", "COEFF_2", "COEFF_1", "COEFF_0"}; - /// Temporary SigBit to SigBit helper map. SigMap sigmap; + static bool is_cascade(const Cell* cell) + { + static const std::vector cascade_ports = { + ID(a_cout_o), + ID(b_cout_o), + ID(z_cout_o), + ID(a_cin_i), + ID(b_cin_i), + ID(z_cin_i) + }; + for (auto p : cascade_ports) { + if (cell->hasPort(p) && !cell->getPort(p).is_fully_undef()) + return true; + } + return false; + } // .......................................... void execute(std::vector a_Args, RTLIL::Design *a_Design) override { log_header(a_Design, "Executing QL_DSP_SIMD pass.\n"); - // DSP control and config ports to consider and how to map them to ports - // of the target DSP cell - static const std::vector> m_DspCfgPorts = { - std::make_pair(ID(clock_i), ID(clk)), - std::make_pair(ID(reset_i), ID(reset)), - std::make_pair(ID(feedback_i), ID(feedback)), - std::make_pair(ID(load_acc_i), ID(load_acc)), - std::make_pair(ID(unsigned_a_i), ID(unsigned_a)), - std::make_pair(ID(unsigned_b_i), ID(unsigned_b)), - std::make_pair(ID(subtract_i), ID(subtract)), - std::make_pair(ID(output_select_i), ID(output_select)), - std::make_pair(ID(saturate_enable_i), ID(saturate_enable)), - std::make_pair(ID(shift_right_i), ID(shift_right)), - std::make_pair(ID(round_i), ID(round)), - std::make_pair(ID(register_inputs_i), ID(register_inputs)) + // The following lists have to match simulation model interfaces. + + // DSP control and config ports that must be equal between + // merged half-blocks + // In addition to functional differences, + // v1 and v2 have different balance between shared functionality + // in ports vs params. + static const std::vector m_Dspv1CfgPorts = { + ID(acc_fir_i), + ID(feedback_i), + ID(load_acc_i), + ID(unsigned_a_i), + ID(unsigned_b_i), + ID(clock_i), + ID(s_reset), + ID(saturate_enable_i), + ID(output_select_i), + ID(round_i), + ID(shift_right_i), + ID(subtract_i), + ID(register_inputs_i), + }; + static const std::vector m_Dspv1CfgParams = { + ID(COEFF_0), + ID(COEFF_1), + ID(COEFF_2), + ID(COEFF_3), + }; + static const std::vector m_Dspv2CfgPorts = { + ID(clock_i), + ID(reset_i), + ID(acc_reset_i), + ID(feedback_i), + ID(load_acc_i), + ID(output_select_i), + }; + static const std::vector m_Dspv2CfgParams = { + ID(COEFF_0), + ID(ACC_FIR), + ID(ROUND), + ID(ZC_SHIFT), + ID(ZREG_SHIFT), + ID(SHIFT_REG), + ID(SATURATE), + ID(SUBTRACT), + ID(PRE_ADD), + ID(A_SEL), + ID(A_REG), + ID(B_SEL), + ID(B_REG), + ID(C_REG), + ID(BC_REG), + ID(M_REG), + ID(FRAC_MODE), }; - // DSP data ports and how to map them to ports of the target DSP cell - static const std::vector> m_DspDataPorts = { - std::make_pair(ID(a_i), ID(a)), - std::make_pair(ID(b_i), ID(b)), - std::make_pair(ID(acc_fir_i), ID(acc_fir)), - std::make_pair(ID(z_o), ID(z)), - std::make_pair(ID(dly_b_o), ID(dly_b)) + + // Data ports to be concatenated into merged cell + static const std::vector m_Dspv1DataPorts = { + ID(a_i), + ID(b_i), + ID(z_o), + ID(dly_b_o), + }; + static const std::vector m_Dspv2DataPorts = { + ID(a_i), + ID(b_i), + ID(c_i), + ID(z_o), }; - // Source DSP cell type (SISD) - static const IdString m_SisdDspType = ID(dsp_t1_10x9x32); + // Source DSP cell type (half-block) + static const IdString m_Dspv1SisdType = ID(dsp_t1_10x9x32_cfg_ports); + static const IdString m_Dspv2SisdType = ID(dspv2_16x9x32_cfg_ports); - // Target DSP cell types for the SIMD mode - static const IdString m_SimdDspType = ID(QL_DSP2); + // Target DSP cell types (full-block) + static const IdString m_Dspv1SimdType = ID(dsp_t1_20x18x64_cfg_ports_fracturable); + static const IdString m_Dspv2SimdType = ID(dspv2_32x18x64_cfg_ports); // Parse args - extra_args(a_Args, 1, a_Design); + int dsp_version = 1; + size_t argidx; + for (argidx = 1; argidx < a_Args.size(); argidx++) { + if (a_Args[argidx] == "-dspv2") { + dsp_version = 2; + continue; + } + break; + } + extra_args(a_Args, argidx, a_Design); + log_assert(dsp_version < 3); + log_assert(dsp_version > 0); + const auto& cfg_ports = (dsp_version == 1) ? m_Dspv1CfgPorts : m_Dspv2CfgPorts; + const auto& cfg_params = (dsp_version == 1) ? m_Dspv1CfgParams : m_Dspv2CfgParams; + const auto& data_ports = (dsp_version == 1) ? m_Dspv1DataPorts : m_Dspv2DataPorts; + auto half_dsp = (dsp_version == 1) ? m_Dspv1SisdType : m_Dspv2SisdType; + auto full_dsp = (dsp_version == 1) ? m_Dspv1SimdType : m_Dspv2SimdType; + + int cellsMerged = 0; // Process modules for (auto module : a_Design->selected_modules()) { // Setup the SigMap @@ -118,25 +195,33 @@ struct QlDspSimdPass : public Pass { dict> groups; for (auto cell : module->selected_cells()) { // Check if this is a DSP cell we are looking for (type starts with m_SisdDspType) - if (cell->type != m_SisdDspType) + if (cell->type != half_dsp) continue; // Skip if it has the (* keep *) attribute set - if (cell->has_keep_attr()) + if (cell->has_keep_attr()) { + log_debug("skip %s because it's marked keep\n", log_id(cell)); continue; + } + + // Skip if it has cascading + if (is_cascade(cell)) { + log_debug("skip %s because it's cascading\n", log_id(cell)); + continue; + } // Add to a group - const auto key = getDspConfig(cell, m_DspCfgPorts); + const auto key = getDspConfig(cell, cfg_ports, cfg_params); groups[key].push_back(cell); } + log_debug("Checking %zu detected mode-equivalent DSP cell classes\n", groups.size()); std::vector cellsToRemove; - // Map cell pairs to the target DSP SIMD cell for (const auto &it : groups) { const auto &group = it.second; const auto &config = it.first; - + log_debug("Checking %zu half-blocks\n", group.size()); // Ensure an even number size_t count = group.size(); if (count & 1) @@ -148,7 +233,7 @@ struct QlDspSimdPass : public Pass { Cell *dsp_b = group[i + 1]; // Create the new cell - Cell *simd = module->addCell(NEW_ID, m_SimdDspType); + Cell *simd = module->addCell(NEW_ID, full_dsp); log(" SIMD: %s (%s) + %s (%s) => %s (%s)\n", log_id(dsp_a), log_id(dsp_a->type), log_id(dsp_b), log_id(dsp_b->type), log_id(simd), log_id(simd->type)); @@ -156,27 +241,36 @@ struct QlDspSimdPass : public Pass { // Check if the target cell is known (important to know // its port widths) if (!simd->known()) - log_error(" The target cell type '%s' is not known!", log_id(simd)); - + log_error(" The target cell type '%s' is not known!", log_id(simd->type)); // Connect common ports - for (const auto &it : m_DspCfgPorts) - simd->setPort(it.first, config.connections.at(it.second)); + + for (auto port : cfg_ports) { + if (config.connections.count(port)) + simd->setPort(port, config.connections.at(port)); + } + for (auto param : cfg_params) { + if (config.parameters.count(param)) + simd->setParam(param, config.parameters.at(param)); + } // Connect data ports - for (const auto &it : m_DspDataPorts) { + for (auto port : data_ports) { size_t width; bool isOutput; - std::tie(width, isOutput) = getPortInfo(simd, it.second); + std::tie(width, isOutput) = getPortInfo(simd, port); + if (!width) + log_error("Can't determine portinfo for %s\n", log_id(port)); auto getConnection = [&](const RTLIL::Cell *cell) { RTLIL::SigSpec sigspec; - if (cell->hasPort(it.first)) { - const auto &sig = cell->getPort(it.first); + if (cell->hasPort(port)) { + const auto &sig = cell->getPort(port); sigspec.append(sig); } int padding = width / 2 - sigspec.size(); + log_assert(padding >= 0); if (padding) { if (!isOutput) @@ -190,27 +284,14 @@ struct QlDspSimdPass : public Pass { RTLIL::SigSpec sigspec; sigspec.append(getConnection(dsp_a)); sigspec.append(getConnection(dsp_b)); - simd->setPort(it.second, sigspec); + simd->setPort(port, sigspec); } - // Concatenate FIR coefficient parameters into the single - // MODE_BITS parameter - Const mode_bits; - for (const auto &it : m_DspParams) { - auto val_a = dsp_a->getParam(it); - auto val_b = dsp_b->getParam(it); - - mode_bits.bits().insert(mode_bits.bits().end(), - val_a.begin(), val_a.end()); - mode_bits.bits().insert(mode_bits.bits().end(), - val_b.begin(), val_b.end()); - } - - // Enable the fractured mode by connecting the control - // port. - simd->setPort(ID(f_mode), State::S1); - simd->setParam(ID(MODE_BITS), mode_bits); - log_assert(mode_bits.size() == m_ModeBitsSize); + // Enable the fractured mode + if (dsp_version == 1) + simd->setPort(ID(f_mode_i), State::S1); + else + simd->setParam(ID(FRAC_MODE), State::S1); // Handle the "is_inferred" attribute. If one of the fragments // is not inferred mark the whole DSP as not inferred @@ -223,11 +304,12 @@ struct QlDspSimdPass : public Pass { cellsToRemove.push_back(dsp_b); } } - + cellsMerged += cellsToRemove.size(); // Remove old cells for (auto cell : cellsToRemove) module->remove(cell); } + log("Merged %d half-block cells\n", cellsMerged); } // .......................................... @@ -257,19 +339,24 @@ struct QlDspSimdPass : public Pass { } /// Given a DSP cell populates and returns a DspConfig struct for it. - DspConfig getDspConfig(RTLIL::Cell *a_Cell, const std::vector> &dspCfgPorts) + DspConfig getDspConfig(RTLIL::Cell *a_Cell, const std::vector &dspCfgPorts, const std::vector &dspCfgParams) { DspConfig config; - for (const auto &it : dspCfgPorts) { - auto port = it.first; - + for (auto port : dspCfgPorts) { // Port unconnected if (!a_Cell->hasPort(port)) continue; config.connections[port] = sigmap(a_Cell->getPort(port)); } + for (auto param : dspCfgParams) { + // Param unset? + if (!a_Cell->hasParam(param)) + continue; + + config.parameters[param] = a_Cell->getParam(param); + } return config; } diff --git a/techlibs/quicklogic/qlf_k6n10f/dsp_final_map.v b/techlibs/quicklogic/qlf_k6n10f/dspv1_final_map.v similarity index 80% rename from techlibs/quicklogic/qlf_k6n10f/dsp_final_map.v rename to techlibs/quicklogic/qlf_k6n10f/dspv1_final_map.v index 9eae617b9..cdbc8056b 100644 --- a/techlibs/quicklogic/qlf_k6n10f/dsp_final_map.v +++ b/techlibs/quicklogic/qlf_k6n10f/dspv1_final_map.v @@ -14,6 +14,63 @@ // // SPDX-License-Identifier: Apache-2.0 +module dsp_t1_20x18x64_cfg_ports_fracturable ( + input [19:0] a_i, + input [17:0] b_i, + input [ 5:0] acc_fir_i, + output [37:0] z_o, + output [17:0] dly_b_o, + + input clock_i, + input reset_i, + + input [2:0] feedback_i, + input load_acc_i, + input unsigned_a_i, + input unsigned_b_i, + + input [2:0] output_select_i, + input saturate_enable_i, + input [5:0] shift_right_i, + input round_i, + input subtract_i, + input register_inputs_i, + input f_mode_i +); + + parameter [19:0] COEFF_0 = 20'd0; + parameter [19:0] COEFF_1 = 20'd0; + parameter [19:0] COEFF_2 = 20'd0; + parameter [19:0] COEFF_3 = 20'd0; + + QL_DSP2 # ( + .MODE_BITS ({COEFF_3, COEFF_2, COEFF_1, COEFF_0}) + ) _TECHMAP_REPLACE_ ( + .a (a_i), + .b (b_i), + .acc_fir (acc_fir_i), + .z (z_o), + .dly_b (dly_b_o), + + .clk (clock_i), + .reset (reset_i), + + .feedback (feedback_i), + .load_acc (load_acc_i), + .unsigned_a (unsigned_a_i), + .unsigned_b (unsigned_b_i), + + .f_mode (f_mode_i), + .output_select (output_select_i), + .saturate_enable (saturate_enable_i), + .shift_right (shift_right_i), + .round (round_i), + .subtract (subtract_i), + .register_inputs (register_inputs_i) + ); + +endmodule + module dsp_t1_20x18x64_cfg_ports ( input [19:0] a_i, input [17:0] b_i, @@ -42,30 +99,30 @@ module dsp_t1_20x18x64_cfg_ports ( parameter [19:0] COEFF_2 = 20'd0; parameter [19:0] COEFF_3 = 20'd0; - QL_DSP2 # ( - .MODE_BITS ({COEFF_3, COEFF_2, COEFF_1, COEFF_0}) + dsp_t1_20x18x64_cfg_ports_fracturable # ( + .COEFF_0 (COEFF_0), + .COEFF_1 (COEFF_1), + .COEFF_2 (COEFF_2), + .COEFF_3 (COEFF_3) ) _TECHMAP_REPLACE_ ( - .a (a_i), - .b (b_i), - .acc_fir (acc_fir_i), - .z (z_o), - .dly_b (dly_b_o), - - .clk (clock_i), - .reset (reset_i), - - .feedback (feedback_i), - .load_acc (load_acc_i), - .unsigned_a (unsigned_a_i), - .unsigned_b (unsigned_b_i), - - .f_mode (1'b0), // No fracturation - .output_select (output_select_i), - .saturate_enable (saturate_enable_i), - .shift_right (shift_right_i), - .round (round_i), - .subtract (subtract_i), - .register_inputs (register_inputs_i) + .a_i (a_i), + .b_i (b_i), + .acc_fir_i (acc_fir_i), + .z_o (z_o), + .dly_b_o (dly_b_o), + .clock_i (clock_i), + .reset_i (reset_i), + .feedback_i (feedback_i), + .load_acc_i (load_acc_i), + .unsigned_a_i (unsigned_a_i), + .unsigned_b_i (unsigned_b_i), + .output_select_i (output_select_i), + .saturate_enable_i (saturate_enable_i), + .shift_right_i (shift_right_i), + .round_i (round_i), + .subtract_i (subtract_i), + .register_inputs_i (register_inputs_i), + .f_mode_i (1'b0) ); endmodule @@ -122,7 +179,7 @@ module dsp_t1_10x9x32_cfg_ports ( .unsigned_a (unsigned_a_i), .unsigned_b (unsigned_b_i), - .f_mode (1'b1), // Enable fractuation, Use the lower half + .f_mode (1'b0), .output_select (output_select_i), .saturate_enable (saturate_enable_i), .shift_right (shift_right_i), diff --git a/techlibs/quicklogic/qlf_k6n10f/dsp_map.v b/techlibs/quicklogic/qlf_k6n10f/dspv1_map.v similarity index 100% rename from techlibs/quicklogic/qlf_k6n10f/dsp_map.v rename to techlibs/quicklogic/qlf_k6n10f/dspv1_map.v diff --git a/techlibs/quicklogic/qlf_k6n10f/dsp_sim.v b/techlibs/quicklogic/qlf_k6n10f/dspv1_sim.v similarity index 100% rename from techlibs/quicklogic/qlf_k6n10f/dsp_sim.v rename to techlibs/quicklogic/qlf_k6n10f/dspv1_sim.v diff --git a/techlibs/quicklogic/qlf_k6n10f/dspv1_sim_extra.v b/techlibs/quicklogic/qlf_k6n10f/dspv1_sim_extra.v new file mode 100644 index 000000000..d888ac08a --- /dev/null +++ b/techlibs/quicklogic/qlf_k6n10f/dspv1_sim_extra.v @@ -0,0 +1,80 @@ +// Copyright 2020-2022 F4PGA Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +`timescale 1ps/1ps + +`default_nettype none + +// dsp_t1_20x18x64_cfg_ports but with input wire f_mode_i +// This is a yosys-specific extension beyond the vendor-provided model +module dsp_t1_20x18x64_cfg_ports_fracturable ( + input wire [19:0] a_i, + input wire [17:0] b_i, + input wire [ 5:0] acc_fir_i, + output wire [37:0] z_o, + output wire [17:0] dly_b_o, + + (* clkbuf_sink *) + input wire clock_i, + input wire reset_i, + + input wire [ 2:0] feedback_i, + input wire load_acc_i, + input wire unsigned_a_i, + input wire unsigned_b_i, + + input wire [ 2:0] output_select_i, + input wire saturate_enable_i, + input wire [ 5:0] shift_right_i, + input wire round_i, + input wire subtract_i, + input wire register_inputs_i, + input wire f_mode_i +); + + parameter [19:0] COEFF_0 = 20'd0; + parameter [19:0] COEFF_1 = 20'd0; + parameter [19:0] COEFF_2 = 20'd0; + parameter [19:0] COEFF_3 = 20'd0; + + QL_DSP2 #( + .MODE_BITS({COEFF_3, COEFF_2, COEFF_1, COEFF_0}) + ) dsp ( + .a(a_i), + .b(b_i), + .z(z_o), + .dly_b(dly_b_o), + + .f_mode(f_mode_i), // 20x18x64 DSP + + .acc_fir(acc_fir_i), + .feedback(feedback_i), + .load_acc(load_acc_i), + + .unsigned_a(unsigned_a_i), + .unsigned_b(unsigned_b_i), + + .clk(clock_i), + .reset(reset_i), + + .saturate_enable(saturate_enable_i), + .output_select(output_select_i), + .round(round_i), + .shift_right(shift_right_i), + .subtract(subtract_i), + .register_inputs(register_inputs_i) + ); +endmodule diff --git a/techlibs/quicklogic/qlf_k6n10f/dspv2_final_map.v b/techlibs/quicklogic/qlf_k6n10f/dspv2_final_map.v new file mode 100644 index 000000000..d951c6f87 --- /dev/null +++ b/techlibs/quicklogic/qlf_k6n10f/dspv2_final_map.v @@ -0,0 +1,142 @@ +// Derived from dspv2_sim.v + +module dspv2_32x18x64_cfg_ports ( + input wire [31:0] a_i, + input wire [17:0] b_i, + input wire [17:0] c_i, + output wire [49:0] z_o, + + input wire clock_i, + input wire reset_i, + input wire acc_reset_i, + + input wire [ 2:0] feedback_i, + input wire load_acc_i, + input wire [ 2:0] output_select_i, + + input wire [31:0] a_cin_i, + input wire [17:0] b_cin_i, + input wire [49:0] z_cin_i, + + output wire [31:0] a_cout_o, + output wire [17:0] b_cout_o, + output wire [49:0] z_cout_o + +); + + parameter [31:0] COEFF_0 = 32'h0; + parameter [5:0] ACC_FIR = 6'h0; + parameter [2:0] ROUND = 3'h0; + parameter [4:0] ZC_SHIFT = 5'h0; + parameter [4:0] ZREG_SHIFT = 5'h0; + parameter [5:0] SHIFT_REG = 6'h0; + parameter SATURATE = 1'b0; + parameter SUBTRACT = 1'b0; + parameter PRE_ADD = 1'b0; + parameter A_SEL = 1'b0; + parameter A_REG = 1'b0; + parameter B_SEL = 1'b0; + parameter B_REG = 1'b0; + parameter C_REG = 1'b0; + parameter BC_REG = 1'b0; + parameter M_REG = 1'b0; + parameter ZCIN_REG = 1'b0; + parameter FRAC_MODE = 1'b0; // 32x18x64 DSP + + (* is_inferred *) + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) _TECHMAP_REPLACE_ ( + .a(a_i), + .b(b_i), + .c(c_i), + .load_acc(load_acc_i), + .feedback(feedback_i), + .output_select(output_select_i), + .z(z_o), + + .clk(clock_i), + .reset(reset_i), + .acc_reset(acc_reset_i), + + .a_cin(a_cin_i), + .b_cin(b_cin_i), + .z_cin(z_cin_i), + + .z_cout(z_cout_o), + .a_cout(a_cout_o), + .b_cout(b_cout_o) + ); + +endmodule + +module dspv2_16x9x32_cfg_ports ( + input wire [15:0] a_i, + input wire [8:0] b_i, + input wire [8:0] c_i, + output wire [24:0] z_o, + + input wire clock_i, + input wire reset_i, + input wire acc_reset_i, + + input wire [ 2:0] feedback_i, + input wire load_acc_i, + input wire [ 2:0] output_select_i, + + input wire [15:0] a_cin_i, + input wire [8:0] b_cin_i, + input wire [24:0] z_cin_i, + + output wire [15:0] a_cout_o, + output wire [8:0] b_cout_o, + output wire [24:0] z_cout_o + +); + + parameter [15:0] COEFF_0 = 16'h0; + parameter [5:0] ACC_FIR = 6'h0; + parameter [2:0] ROUND = 3'h0; + parameter [4:0] ZC_SHIFT = 5'h0; + parameter [4:0] ZREG_SHIFT = 5'h0; + parameter [5:0] SHIFT_REG = 6'h0; + parameter SATURATE = 1'b0; + parameter SUBTRACT = 1'b0; + parameter PRE_ADD = 1'b0; + parameter A_SEL = 1'b0; + parameter A_REG = 1'b0; + parameter B_SEL = 1'b0; + parameter B_REG = 1'b0; + parameter C_REG = 1'b0; + parameter BC_REG = 1'b0; + parameter M_REG = 1'b0; + parameter ZCIN_REG = 1'b0; + parameter FRAC_MODE = 1'b1; // 16x9x32 DSP + + (* is_inferred *) + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,16'h0,COEFF_0}) + ) _TECHMAP_REPLACE_ ( + .a(a_i), + .b(b_i), + .c(c_i), + .load_acc(load_acc_i), + .feedback(feedback_i), + .output_select(output_select_i), + .z(z_o), + + .clk(clock_i), + .reset(reset_i), + .acc_reset(acc_reset_i), + + .a_cin(a_cin_i), + .b_cin(b_cin_i), + .z_cin(z_cin_i), + + .z_cout(z_cout_o), + .a_cout(a_cout_o), + .b_cout(b_cout_o) + ); + +endmodule + diff --git a/techlibs/quicklogic/qlf_k6n10f/dspv2_map.v b/techlibs/quicklogic/qlf_k6n10f/dspv2_map.v new file mode 100644 index 000000000..8640edb12 --- /dev/null +++ b/techlibs/quicklogic/qlf_k6n10f/dspv2_map.v @@ -0,0 +1,59 @@ +module \$__MUL32X18 (input [31:0] A, input [17:0] B, output [49:0] Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 32; + parameter B_WIDTH = 18; + parameter Y_WIDTH = 50; + + dspv2_32x18x64_cfg_ports _TECHMAP_REPLACE_ ( + .a_i(A), + .b_i(B), + .c_i(18'd0), + .z_o(Y), + + .clock_i(1'bx), + .reset_i(1'bx), + .acc_reset_i(1'b0), + .feedback_i(3'd0), + .load_acc_i(1'b0), + .output_select_i(3'd0), + .a_cin_i(32'dx), + .b_cin_i(18'dx), + .z_cin_i(50'dx), +/* TODO: connect to dummy wires? + .a_cout_o(), + .b_cout_o(), + .z_cout_o(), +*/ + ); +endmodule + +module \$__MUL16X9 (input [15:0] A, input [8:0] B, output [24:0] Y); + parameter A_SIGNED = 0; + parameter B_SIGNED = 0; + parameter A_WIDTH = 16; + parameter B_WIDTH = 9; + parameter Y_WIDTH = 25; + + dspv2_16x9x32_cfg_ports _TECHMAP_REPLACE_ ( + .a_i(A), + .b_i(B), + .c_i(9'd0), + .z_o(Y), + + .clock_i(1'bx), + .reset_i(1'bx), + .acc_reset_i(1'b0), + .feedback_i(3'd0), + .load_acc_i(1'b0), + .output_select_i(3'd0), + .a_cin_i(32'dx), + .b_cin_i(18'dx), + .z_cin_i(50'dx), +/* TODO: connect to dummy wires? + .a_cout_o(), + .b_cout_o(), + .z_cout_o(), +*/ + ); +endmodule diff --git a/techlibs/quicklogic/qlf_k6n10f/dspv2_sim.v b/techlibs/quicklogic/qlf_k6n10f/dspv2_sim.v new file mode 100644 index 000000000..fd85c70ff --- /dev/null +++ b/techlibs/quicklogic/qlf_k6n10f/dspv2_sim.v @@ -0,0 +1,1437 @@ +`timescale 1ps/1ps + +`default_nettype none + +module QL_DSPV2 ( + input wire [31:0] a, + input wire [17:0] b, + input wire [17:0] c, + input wire load_acc, + input wire [2:0] feedback, + input wire [2:0] output_select, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + + input wire [31:0] a_cin, + input wire [17:0] b_cin, + input wire [49:0] z_cin, + output wire [49:0] z_cout, + output wire [31:0] a_cout, + output wire [17:0] b_cout +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + localparam NBITS_ACC = 64; + localparam NBITS_A = 32; + localparam NBITS_BC = 18; + localparam NBITS_Z = 50; + + wire [NBITS_Z-1:0] dsp_full_z; + wire [(NBITS_Z/2)-1:0] dsp_frac0_z; + wire [(NBITS_Z/2)-1:0] dsp_frac1_z; + + wire [NBITS_Z-1:0] dsp_full_z_cout; + wire [(NBITS_Z/2)-1:0] dsp_frac0_z_cout; + wire [(NBITS_Z/2)-1:0] dsp_frac1_z_cout; + + wire [NBITS_A-1:0] dsp_full_a_cout; + wire [(NBITS_A/2)-1:0] dsp_frac0_a_cout; + wire [(NBITS_A/2)-1:0] dsp_frac1_a_cout; + + wire [NBITS_BC-1:0] dsp_full_b_cout; + wire [(NBITS_BC/2)-1:0] dsp_frac0_b_cout; + wire [(NBITS_BC/2)-1:0] dsp_frac1_b_cout; + + assign z = FRAC_MODE ? {dsp_frac1_z, dsp_frac0_z} : dsp_full_z; + assign z_cout = FRAC_MODE ? {dsp_frac1_z_cout, dsp_frac0_z_cout} : dsp_full_z_cout; + assign a_cout = FRAC_MODE ? {dsp_frac1_a_cout, dsp_frac0_a_cout} : dsp_full_a_cout; + assign b_cout = FRAC_MODE ? {dsp_frac1_b_cout, dsp_frac0_b_cout} : dsp_full_b_cout; + + // Output used when fmode == 1 + dspv2_sim_cfg_ports #( + .NBITS_A(NBITS_A/2), + .NBITS_BC(NBITS_BC/2), + .NBITS_ACC(NBITS_ACC/2), + .NBITS_Z(NBITS_Z/2) + ) dsp_frac0 ( + // active/fabric ports + .clock_i(clk), + .s_reset(reset), + .a_i(a[(NBITS_A/2)-1:0]), + .b_i(b[(NBITS_BC/2)-1:0]), + .c_i(c[(NBITS_BC/2)-1:0]), + .feedback_i(feedback), + .output_select_i(output_select), + .load_acc_i(load_acc), + .rst_acc_i(acc_reset), + .z_o(dsp_frac0_z), + // cascade ports (connect to dedicated cascade routing) + .a_cin_i(a_cin[(NBITS_A/2)-1:0]), + .b_cin_i(b_cin[(NBITS_BC/2)-1:0]), + .z_cin_i(z_cin[(NBITS_Z/2)-1:0]), + .z_cout_o(dsp_frac0_z_cout), + .a_cout_o(dsp_frac0_a_cout), + .b_cout_o(dsp_frac0_b_cout), + // configuration ports (tie-offs) + .coeff_i(COEFF_0[(NBITS_A/2)-1:0]), + .acc_fir_i(ACC_FIR), + .round_i(ROUND), + .zc_shift_i(ZC_SHIFT), + .zreg_shift_i(ZREG_SHIFT), + .shift_right_i(SHIFT_REG), + .saturate_enable_i(SATURATE), + .subtract_i(SUBTRACT), + .pre_add_sel_i(PRE_ADD), + .a_sel_i(A_SEL), + .a_reg_i(A_REG), + .b_sel_i(B_SEL), + .b_reg_i(B_REG), + .c_reg_i(C_REG), + .bc_reg_i(BC_REG), + .m_reg_i(M_REG), + .zcin_sel_i(ZCIN_REG) + ); + + // Output used when fmode == 1 + dspv2_sim_cfg_ports #( + .NBITS_A(NBITS_A/2), + .NBITS_BC(NBITS_BC/2), + .NBITS_ACC(NBITS_ACC/2), + .NBITS_Z(NBITS_Z/2) + ) dsp_frac1 ( + // active/fabric ports + .clock_i(clk), + .s_reset(reset), + .a_i(a[NBITS_A-1:NBITS_A/2]), + .b_i(b[NBITS_BC-1:NBITS_BC/2]), + .c_i(c[NBITS_BC-1:NBITS_BC/2]), + .feedback_i(feedback), + .output_select_i(output_select), + .load_acc_i(load_acc), + .rst_acc_i(acc_reset), + .z_o(dsp_frac1_z), + // cascade ports (connect to dedicated cascade routing) + .a_cin_i(a_cin[NBITS_A-1:NBITS_A/2]), + .b_cin_i(b_cin[NBITS_BC-1:NBITS_BC/2]), + .z_cin_i(z_cin[NBITS_Z-1:NBITS_Z/2]), + .z_cout_o(dsp_frac1_z_cout), + .a_cout_o(dsp_frac1_a_cout), + .b_cout_o(dsp_frac1_b_cout), + // configuration ports (tie-offs) + .coeff_i(COEFF_0[NBITS_A-1:NBITS_A/2]), + .acc_fir_i(ACC_FIR), + .round_i(ROUND), + .zc_shift_i(ZC_SHIFT), + .zreg_shift_i(ZREG_SHIFT), + .shift_right_i(SHIFT_REG), + .saturate_enable_i(SATURATE), + .subtract_i(SUBTRACT), + .pre_add_sel_i(PRE_ADD), + .a_sel_i(A_SEL), + .a_reg_i(A_REG), + .b_sel_i(B_SEL), + .b_reg_i(B_REG), + .c_reg_i(C_REG), + .bc_reg_i(BC_REG), + .m_reg_i(M_REG), + .zcin_sel_i(ZCIN_REG) + ); + + // Output used when fmode == 0 + dspv2_sim_cfg_ports #( + .NBITS_A(NBITS_A), + .NBITS_BC(NBITS_BC), + .NBITS_ACC(NBITS_ACC), + .NBITS_Z(NBITS_Z) + ) dsp_full ( + // active/fabric ports + .clock_i(clk), + .s_reset(reset), + .a_i(a), + .b_i(b), + .c_i(c), + .feedback_i(feedback), + .output_select_i(output_select), + .load_acc_i(load_acc), + .rst_acc_i(acc_reset), + .z_o(dsp_full_z), + // cascade ports (connect to dedicated cascade routing) + .a_cin_i(a_cin), + .b_cin_i(b_cin), + .z_cin_i(z_cin), + .z_cout_o(dsp_full_z_cout), + .a_cout_o(dsp_full_a_cout), + .b_cout_o(dsp_full_b_cout), + // configuration ports (tie-offs) + .coeff_i(COEFF_0), + .acc_fir_i(ACC_FIR), + .round_i(ROUND), + .zc_shift_i(ZC_SHIFT), + .zreg_shift_i(ZREG_SHIFT), + .shift_right_i(SHIFT_REG), + .saturate_enable_i(SATURATE), + .subtract_i(SUBTRACT), + .pre_add_sel_i(PRE_ADD), + .a_sel_i(A_SEL), + .a_reg_i(A_REG), + .b_sel_i(B_SEL), + .b_reg_i(B_REG), + .c_reg_i(C_REG), + .bc_reg_i(BC_REG), + .m_reg_i(M_REG), + .zcin_sel_i(ZCIN_REG) + ); + +endmodule + +module QL_DSPV2_MULT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + input wire [2:0] feedback, + input wire [2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(1'b0), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(), + .reset(), + .acc_reset(1'b0), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULT_REGIN ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + + input wire [2:0] feedback, + input wire [2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h00A000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(1'b0), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(1'b0), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULT_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + + input wire [2:0] feedback, + input wire [2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(1'b0), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(1'b0), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULT_REGIN_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + + input wire [2:0] feedback, + input wire [2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h00A000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(1'b0), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(1'b0), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTADD ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + + input wire [49:0] z_cin, + output wire [49:0] z_cout, + + input wire [ 2:0] feedback, + input wire [ 2:0] output_select +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(z_cin), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTADD_REGIN ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + + input wire [49:0] z_cin, + output wire [49:0] z_cout, + + input wire [ 2:0] feedback, + input wire [ 2:0] output_select +); + + parameter [71:0] MODE_BITS = 72'h00A000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(z_cin), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTADD_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + + input wire [49:0] z_cin, + output wire [49:0] z_cout, + + input wire [ 2:0] feedback, + input wire [ 2:0] output_select +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(z_cin), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTADD_REGIN_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + + input wire [49:0] z_cin, + output wire [49:0] z_cout, + + input wire [ 2:0] feedback, + input wire [ 2:0] output_select +); + + parameter [71:0] MODE_BITS = 72'h00A000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(z_cin), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTACC ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + input wire [ 2:0] feedback, + input wire [ 2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTACC_REGIN ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + input wire [ 2:0] feedback, + input wire [ 2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h004000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTACC_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + input wire [ 2:0] feedback, + input wire [ 2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h000000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module QL_DSPV2_MULTACC_REGIN_REGOUT ( + input wire [31:0] a, + input wire [17:0] b, + output wire [49:0] z, + + (* clkbuf_sink *) + input wire clk, + input wire reset, + input wire acc_reset, + input wire load_acc, + input wire [ 2:0] feedback, + input wire [ 2:0] output_select, + + output wire [49:0] z_cout +); + + parameter [71:0] MODE_BITS = 72'h004000000000000000; + + localparam [31:0] COEFF_0 = MODE_BITS[31:0]; + localparam [5:0] ACC_FIR = MODE_BITS[37:32]; + localparam [2:0] ROUND = MODE_BITS[40:38]; + localparam [4:0] ZC_SHIFT = MODE_BITS[45:41]; + localparam [4:0] ZREG_SHIFT= MODE_BITS[50:46]; + localparam [5:0] SHIFT_REG = MODE_BITS[56:51]; + localparam SATURATE = MODE_BITS[57]; + localparam SUBTRACT = MODE_BITS[58]; + localparam PRE_ADD = MODE_BITS[59]; + localparam A_SEL = MODE_BITS[60]; + localparam A_REG = MODE_BITS[61]; + localparam B_SEL = MODE_BITS[62]; + localparam B_REG = MODE_BITS[63]; + localparam C_REG = MODE_BITS[64]; + localparam BC_REG = MODE_BITS[65]; + localparam M_REG = MODE_BITS[66]; + localparam ZCIN_REG = MODE_BITS[67]; + localparam FRAC_MODE = MODE_BITS[71]; + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a), + .b(b), + .c(18'h0), + .load_acc(load_acc), + .feedback(feedback), + .output_select(output_select), + .z(z), + + .clk(clk), + .reset(reset), + .acc_reset(acc_reset), + + .a_cin(), + .b_cin(), + .z_cin(), + + .z_cout(z_cout), + .a_cout(), + .b_cout() + ); + +endmodule + +module dspv2_32x18x64_cfg_ports ( + input wire [31:0] a_i, + input wire [17:0] b_i, + input wire [17:0] c_i, + output wire [49:0] z_o, + + (* clkbuf_sink *) + input wire clock_i, + input wire reset_i, + input wire acc_reset_i, + + input wire [ 2:0] feedback_i, + input wire load_acc_i, + input wire [ 2:0] output_select_i, + + input wire [31:0] a_cin_i, + input wire [17:0] b_cin_i, + input wire [49:0] z_cin_i, + + output wire [31:0] a_cout_o, + output wire [17:0] b_cout_o, + output wire [49:0] z_cout_o + +); + + parameter [31:0] COEFF_0 = 32'h0; + parameter [5:0] ACC_FIR = 6'h0; + parameter [2:0] ROUND = 3'h0; + parameter [4:0] ZC_SHIFT = 5'h0; + parameter [4:0] ZREG_SHIFT = 5'h0; + parameter [5:0] SHIFT_REG = 6'h0; + parameter SATURATE = 1'b0; + parameter SUBTRACT = 1'b0; + parameter PRE_ADD = 1'b0; + parameter A_SEL = 1'b0; + parameter A_REG = 1'b0; + parameter B_SEL = 1'b0; + parameter B_REG = 1'b0; + parameter C_REG = 1'b0; + parameter BC_REG = 1'b0; + parameter M_REG = 1'b0; + parameter ZCIN_REG = 1'b0; + parameter FRAC_MODE = 1'b0; // 32x18x64 DSP + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,COEFF_0}) + ) dsp ( + .a(a_i), + .b(b_i), + .c(c_i), + .load_acc(load_acc_i), + .feedback(feedback_i), + .output_select(output_select_i), + .z(z_o), + + .clk(clock_i), + .reset(reset_i), + .acc_reset(acc_reset_i), + + .a_cin(a_cin_i), + .b_cin(b_cin_i), + .z_cin(z_cin_i), + + .z_cout(z_cout_o), + .a_cout(a_cout_o), + .b_cout(b_cout_o) + ); + +endmodule + +module dspv2_16x9x32_cfg_ports ( + input wire [15:0] a_i, + input wire [8:0] b_i, + input wire [8:0] c_i, + output wire [24:0] z_o, + + (* clkbuf_sink *) + input wire clock_i, + input wire reset_i, + input wire acc_reset_i, + + input wire [ 2:0] feedback_i, + input wire load_acc_i, + input wire [ 2:0] output_select_i, + + input wire [15:0] a_cin_i, + input wire [8:0] b_cin_i, + input wire [24:0] z_cin_i, + + output wire [15:0] a_cout_o, + output wire [8:0] b_cout_o, + output wire [24:0] z_cout_o + +); + + parameter [15:0] COEFF_0 = 16'h0; + parameter [5:0] ACC_FIR = 6'h0; + parameter [2:0] ROUND = 3'h0; + parameter [4:0] ZC_SHIFT = 5'h0; + parameter [4:0] ZREG_SHIFT = 5'h0; + parameter [5:0] SHIFT_REG = 6'h0; + parameter SATURATE = 1'b0; + parameter SUBTRACT = 1'b0; + parameter PRE_ADD = 1'b0; + parameter A_SEL = 1'b0; + parameter A_REG = 1'b0; + parameter B_SEL = 1'b0; + parameter B_REG = 1'b0; + parameter C_REG = 1'b0; + parameter BC_REG = 1'b0; + parameter M_REG = 1'b0; + parameter ZCIN_REG = 1'b0; + parameter FRAC_MODE = 1'b1; // 16x9x32 DSP + + QL_DSPV2 #( + .MODE_BITS({FRAC_MODE,3'b000,ZCIN_REG,M_REG,BC_REG,C_REG,B_REG,B_SEL,A_REG,A_SEL,PRE_ADD,SUBTRACT,SATURATE,SHIFT_REG,ZREG_SHIFT,ZC_SHIFT,ROUND,ACC_FIR,16'h0,COEFF_0}) + ) dsp ( + .a(a_i), + .b(b_i), + .c(c_i), + .load_acc(load_acc_i), + .feedback(feedback_i), + .output_select(output_select_i), + .z(z_o), + + .clk(clock_i), + .reset(reset_i), + .acc_reset(acc_reset_i), + + .a_cin(a_cin_i), + .b_cin(b_cin_i), + .z_cin(z_cin_i), + + .z_cout(z_cout_o), + .a_cout(a_cout_o), + .b_cout(b_cout_o) + ); + +endmodule + +module dspv2_sim_cfg_ports # ( + parameter NBITS_ACC = 64, + parameter NBITS_A = 32, + parameter NBITS_BC = 18, + parameter NBITS_Z = 50 +)( + // active/fabric ports + input wire clock_i, + input wire s_reset, + input wire [NBITS_A-1:0] a_i, + input wire [NBITS_BC-1:0] b_i, + input wire [NBITS_BC-1:0] c_i, + input wire [2:0] feedback_i, + input wire [2:0] output_select_i, + input wire load_acc_i, + input wire rst_acc_i, + output wire [NBITS_Z-1:0] z_o, + + // cascade ports (connect to dedicated cascade routing) + input wire [NBITS_A-1:0] a_cin_i, + input wire [NBITS_BC-1:0] b_cin_i, + input wire [NBITS_Z-1:0] z_cin_i, + output wire [NBITS_Z-1:0] z_cout_o, + output wire [NBITS_A-1:0] a_cout_o, + output wire [NBITS_BC-1:0] b_cout_o, + + // configuration ports (tie-offs) + input wire [NBITS_A-1:0] coeff_i, + input wire [5:0] acc_fir_i, + input wire [2:0] round_i, + input wire [4:0] zc_shift_i, + input wire [4:0] zreg_shift_i, + input wire [5:0] shift_right_i, + input wire saturate_enable_i, + input wire subtract_i, + input wire pre_add_sel_i, + input wire a_sel_i, + input wire a_reg_i, + input wire b_sel_i, + input wire b_reg_i, + input wire c_reg_i, + input wire bc_reg_i, + input wire m_reg_i, + input wire zcin_sel_i +); + + // Input registers + reg [NBITS_A-1:0] r_a; + reg [NBITS_BC-1:0] r_b; + reg [NBITS_BC-1:0] r_c; + + reg [NBITS_ACC-1:0] acc; + + wire [NBITS_A-1:0] a_acin_dat; + wire [NBITS_BC-1:0] b_bcin_dat; + + wire [NBITS_A-1:0] a; + wire [NBITS_BC-1:0] b; + wire [NBITS_BC-1:0] c; + + wire [NBITS_BC:0] preadd_raw; + + reg [NBITS_BC-1:0] preadd_sat; + reg [NBITS_BC-1:0] preadd_sat_r; + wire [NBITS_BC-1:0] preadd; + + initial begin + r_a <= 0; + r_b <= 0; + r_c <= 0; + end + + assign a_acin_dat = (a_sel_i)? a_cin_i: a_i; + assign b_bcin_dat = (b_sel_i)? b_cin_i: b_i; + + always @(posedge clock_i or posedge s_reset) begin + if (s_reset) begin + r_a <= 0; + r_b <= 0; + r_c <= 0; + end else begin + r_a <= a_acin_dat; + r_b <= b_bcin_dat; + r_c <= c_i; + end + end + + // Registered / non-registered input path select + assign a = (a_reg_i) ? r_a : a_acin_dat; + assign b = (b_reg_i) ? r_b : b_bcin_dat; + assign c = (c_reg_i) ? r_c : c_i; + + assign preadd_raw = b + c; + + always @(*) begin + if (!b[(NBITS_BC-1)] && !c[(NBITS_BC-1)]) begin // pos+pos + if (preadd_raw[(NBITS_BC-1)]) begin + preadd_sat = {1'b0, {(NBITS_BC-1){1'b1}}}; // max pos # + end else begin + preadd_sat = preadd_raw[(NBITS_BC-1):0]; + end + end else begin + if (b[(NBITS_BC-1)] && c[(NBITS_BC-1)]) begin // neg+neg + if (!preadd_raw[(NBITS_BC-1)]) begin + preadd_sat = {1'b1, {(NBITS_BC-1){1'b0}}}; // max neg # + end else begin + preadd_sat = preadd_raw[(NBITS_BC-1):0]; + end + end else begin // pos+neg or neg+pos + preadd_sat = preadd_raw[(NBITS_BC-1):0]; + end + end + end + + always @(posedge clock_i or posedge s_reset) begin + if (s_reset) begin + preadd_sat_r <= 0; + end else begin + preadd_sat_r <= preadd_sat; + end + end + + assign preadd = (bc_reg_i)? preadd_sat_r : preadd_sat; + + + // Multiplier + wire [NBITS_A-1:0] mult_a; + wire [NBITS_BC-1:0] mult_b; + wire mult_sgn_a; + wire [NBITS_A-1:0] mult_mag_a; + wire mult_sgn_b; + wire [NBITS_BC-1:0] mult_mag_b; + + wire [NBITS_A+NBITS_BC-1:0] mult_mag; + wire mult_sgn; + wire [NBITS_A+NBITS_BC-1:0] mult; + wire [NBITS_ACC-1:0] mult_xtnd; + + reg [NBITS_ACC-1:0] mult_xtnd_r; + wire [NBITS_ACC-1:0] mult_xtnd_sel; + wire [NBITS_ACC-1:0] mult_xtnd_sub; + wire [NBITS_ACC-1:0] add_a; + wire [NBITS_ACC-1:0] add_b; + wire [NBITS_ACC-1:0] add_o; + wire [NBITS_ACC-1:0] acc_fir_int; + + wire [NBITS_ACC-1:0] acc_out; + + wire [NBITS_ACC-1:0] zcin_rshift; + wire [NBITS_ACC-1:0] zcin_xtnd; + wire [NBITS_ACC-1:0] zreg_rshift; + + // Output signals + wire [NBITS_Z-1:0] z0; + reg [NBITS_Z-1:0] z1; + wire [NBITS_Z-1:0] z2; + wire [NBITS_Z-1:0] z_cin; + + assign mult_a = (feedback_i == 3'h0) ? a : + (feedback_i == 3'h1) ? a : + (feedback_i == 3'h2) ? a : + (feedback_i == 3'h3) ? a : + (feedback_i == 3'h4) ? a : + (feedback_i == 3'h5) ? a : + (feedback_i == 3'h6) ? acc[NBITS_A-1:0]: + coeff_i; // if feedback_i == 3'h7 + + assign mult_b = (pre_add_sel_i) ? preadd : b; + + assign mult_sgn_a = mult_a[NBITS_A-1]; + assign mult_mag_a = (mult_sgn_a) ? (~mult_a + 1) : mult_a; + assign mult_sgn_b = mult_b[NBITS_BC-1]; + assign mult_mag_b = (mult_sgn_b) ? (~mult_b + 1) : mult_b; + + assign mult_mag = mult_mag_a * mult_mag_b; + assign mult_sgn = (mult_sgn_a ^ mult_sgn_b); + + assign mult = (mult_sgn)? (~mult_mag + 1) : mult_mag; + + // Sign extension + assign mult_xtnd = {{(NBITS_ACC-NBITS_A-NBITS_BC){mult[NBITS_A+NBITS_BC-1]}}, mult[NBITS_A+NBITS_BC-1:0]}; + + always @(posedge clock_i or posedge s_reset) begin + if (s_reset) begin + mult_xtnd_r <= 0; + end else begin + mult_xtnd_r <= mult_xtnd; + end + end + + assign mult_xtnd_sel = m_reg_i ? mult_xtnd_r : mult_xtnd; + + // Adder + assign mult_xtnd_sub = subtract_i ? (~mult_xtnd_sel + 1) : mult_xtnd_sel; + assign add_a = (feedback_i[2:0] == 2) ? {a,b} : mult_xtnd_sub; + + assign acc_fir_int = a <<< acc_fir_i; + + + assign z_cin = (zcin_sel_i)? z_cin_i : 50'h0; + assign zcin_rshift = z_cin >>> zc_shift_i; + assign zcin_xtnd = {{(NBITS_ACC-NBITS_Z){z_cin[NBITS_Z-1]}}, z_cin}; + + assign zreg_rshift = z1 >>> zreg_shift_i; + + assign add_b = (feedback_i == 3'h0) ? acc : + (feedback_i == 3'h1) ? zcin_rshift : + (feedback_i == 3'h2) ? zcin_xtnd : + (feedback_i == 3'h3) ? zcin_xtnd : + (feedback_i == 3'h4) ? z1 : + (feedback_i == 3'h5) ? zreg_rshift : + acc_fir_int; + + assign add_o = add_a + add_b; + + // Accumulator + initial acc <= 0; + + always @(posedge clock_i or posedge s_reset) + if (s_reset) + acc <= 'h0; + else begin + if (rst_acc_i) + acc <= 'h0; + else if (load_acc_i) + acc <= add_o; + else + acc <= acc; + end + + // Adder/accumulator output selection + assign acc_out = (output_select_i[1]) ? add_o : acc; + + // Round, shift, saturate + wire a_sign; + wire [NBITS_ACC-1:0] onehalf; + wire [NBITS_ACC-1:0] int_mask; + wire [NBITS_ACC-1:0] frac_mask; + wire [NBITS_ACC-1:0] a_frac; + wire [NBITS_ACC-1:0] a_int; + + reg [NBITS_ACC-1:0] acc_rnd; + wire [NBITS_ACC-1:0] acc_shr; + wire [NBITS_ACC-1:0] acc_sat_s; + wire [NBITS_ACC-1:0] acc_sat; + + assign a_sign = acc_out[(NBITS_ACC-1)]; + assign onehalf = (shift_right_i == 6'b0) ? {NBITS_ACC{1'b0}} : ({{(NBITS_ACC-1){1'b0}},1'b1} << (shift_right_i-1)); + assign int_mask = ({NBITS_ACC{1'b1}} << shift_right_i); + assign frac_mask = ~int_mask; + assign a_frac = acc_out & frac_mask; + assign a_int = acc_out >>> shift_right_i; + + always @(*) begin + case(round_i) + 3'b000 : // no rounding + acc_rnd = acc_out; + + 3'b001 : // round half up, asymmetrical + // add 1/2 + acc_rnd = acc_out + onehalf; + + 3'b010 : // round half up, symmetrical + // if a is neg and a_frac = 1/2, do nothing, else add 1/2 + if ((a_sign == 1'b1) && (a_frac == onehalf)) + acc_rnd = acc_out; + else + acc_rnd = acc_out + onehalf; + + 3'b011 : // round half down, symmetrical + // if a is pos and a_frac = 1/2, do nothing, else add 1/2 + if ((a_sign == 1'b0) && (a_frac == onehalf)) + acc_rnd = acc_out; + else + acc_rnd = acc_out + onehalf; + + 3'b100 : // round half even + // if a is even and a_frac = 1/2, do nothing, else add 1/2 + if ((a_int[0] == 1'b0) && (a_frac == onehalf)) + acc_rnd = acc_out; + else + acc_rnd = acc_out + onehalf; + + 3'b100 : // round half odd + // if a is odd and a_frac = 1/2, do nothing, else add 1/2 + if ((a_int[0] == 1'b1) && (a_frac == onehalf)) + acc_rnd = acc_out; + else + acc_rnd = acc_out + onehalf; + + default : // no rounding + acc_rnd = acc_out; + + endcase + end + + assign acc_shr = (acc_rnd >>> shift_right_i); + + assign acc_sat_s = ((|acc_shr[NBITS_ACC-1:NBITS_Z-1] == 1'b0) || + (&acc_shr[NBITS_ACC-1:NBITS_Z-1] == 1'b1)) ? {{(NBITS_ACC-NBITS_Z){1'b0}},{acc_shr[NBITS_Z-1:0]}} : + {{(NBITS_ACC-NBITS_Z){1'b0}},{acc_shr[NBITS_ACC-1],{NBITS_Z-1{~acc_shr[NBITS_ACC-1]}}}}; + + assign acc_sat = (saturate_enable_i)? acc_sat_s : acc_shr; + + assign z0 = mult_xtnd_sel[NBITS_Z-1:0]; + assign z2 = acc_sat[NBITS_Z-1:0]; + + initial z1 <= 0; + + always @(posedge clock_i or posedge s_reset) + if (s_reset) + z1 <= 0; + else begin + z1 <= (output_select_i == 3'b100) ? z0 : z2; + end + + // Output mux + assign z_o = (output_select_i == 3'h0) ? z0 : + (output_select_i == 3'h1) ? z2 : + (output_select_i == 3'h2) ? z2 : + (output_select_i == 3'h3) ? z2 : + (output_select_i == 3'h4) ? z1 : + (output_select_i == 3'h5) ? z1 : + (output_select_i == 3'h6) ? z1 : + z1; // if output_select_i == 3'h7 + + assign z_cout_o = z_o; + assign a_cout_o = r_a; + assign b_cout_o = r_b; + +endmodule \ No newline at end of file diff --git a/techlibs/quicklogic/synth_quicklogic.cc b/techlibs/quicklogic/synth_quicklogic.cc index 07ec769b5..8ac5eacb9 100644 --- a/techlibs/quicklogic/synth_quicklogic.cc +++ b/techlibs/quicklogic/synth_quicklogic.cc @@ -27,6 +27,12 @@ PRIVATE_NAMESPACE_BEGIN struct SynthQuickLogicPass : public ScriptPass { + enum DSPKind { + None, + V1, + V2, + }; + SynthQuickLogicPass() : ScriptPass("synth_quicklogic", "Synthesis for QuickLogic FPGAs") {} void help() override @@ -50,6 +56,10 @@ struct SynthQuickLogicPass : public ScriptPass { log(" do not use dsp_t1_* to implement multipliers and associated logic\n"); log(" (qlf_k6n10f only).\n"); log("\n"); + log(" -dspv2\n"); + log(" synthesize for the v2 DSP block model instead of v1\n"); + log(" (qlf_k6n10f only).\n"); + log("\n"); log(" -nocarry\n"); log(" do not use adder_carry cells in output netlist.\n"); log("\n"); @@ -78,7 +88,8 @@ struct SynthQuickLogicPass : public ScriptPass { } string top_opt, blif_file, edif_file, family, currmodule, verilog_file, lib_path; - bool abc9, inferAdder, nobram, bramTypes, dsp, ioff; + bool abc9, inferAdder, nobram, bramTypes, ioff; + DSPKind dsp; void clear_flags() override { @@ -93,7 +104,7 @@ struct SynthQuickLogicPass : public ScriptPass { nobram = false; bramTypes = false; lib_path = "+/quicklogic/"; - dsp = true; + dsp = V1; ioff = true; } @@ -156,7 +167,11 @@ struct SynthQuickLogicPass : public ScriptPass { continue; } if (args[argidx] == "-nodsp" || args[argidx] == "-no_dsp") { - dsp = false; + dsp = None; + continue; + } + if (args[argidx] == "-dspv2") { + dsp = V2; continue; } if (args[argidx] == "-noioff") { @@ -198,8 +213,11 @@ struct SynthQuickLogicPass : public ScriptPass { read_simlibs += stringf(" %sqlf_k6n10f/brams_sim.v", lib_path.c_str()); if (bramTypes) read_simlibs += stringf(" %sqlf_k6n10f/bram_types_sim.v", lib_path.c_str()); - if (dsp) - read_simlibs += stringf(" %sqlf_k6n10f/dsp_sim.v", lib_path.c_str()); + if (dsp == V1) { + read_simlibs += stringf(" %sqlf_k6n10f/dspv1_sim.v", lib_path.c_str()); + read_simlibs += stringf(" %sqlf_k6n10f/dspv1_sim_extra.v", lib_path.c_str()); + } else if (dsp == V2) + read_simlibs += stringf(" %sqlf_k6n10f/dspv2_sim.v", lib_path.c_str()); } run(read_simlibs); run(stringf("hierarchy -check %s", help_mode ? "-top " : top_opt.c_str())); @@ -225,18 +243,35 @@ struct SynthQuickLogicPass : public ScriptPass { } if (check_label("map_dsp", "(for qlf_k6n10f, skip if -nodsp)") - && ((dsp && family == "qlf_k6n10f") || help_mode)) { + && (((dsp != None) && family == "qlf_k6n10f") || help_mode)) { run("wreduce t:$mul"); - run("ql_dsp_macc"); - run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=20 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=11 -D DSP_B_MINWIDTH=10 -D DSP_NAME=$__QL_MUL20X18"); - run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=10 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_NAME=$__QL_MUL10X9"); - run("chtype -set $mul t:$__soft_mul"); + if (dsp == V1) { + run("ql_dsp_macc"); - run("techmap -map " + lib_path + family + "/dsp_map.v -D USE_DSP_CFG_PARAMS=0"); - run("ql_dsp_simd"); - run("techmap -map " + lib_path + family + "/dsp_final_map.v"); - run("ql_dsp_io_regs"); + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=20 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=11 -D DSP_B_MINWIDTH=10 -D DSP_NAME=$__QL_MUL20X18"); + run("techmap -map +/mul2dsp.v -D DSP_A_MAXWIDTH=10 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_NAME=$__QL_MUL10X9"); + run("chtype -set $mul t:$__soft_mul"); + + run("techmap -map " + lib_path + family + "/dspv1_map.v -D USE_DSP_CFG_PARAMS=0"); + run("ql_dsp_simd"); + run("techmap -map " + lib_path + family + "/dspv1_final_map.v"); + run("ql_dsp_io_regs"); + } else if (dsp == V2) { + run("ql_dsp_macc -dspv2"); + run("techmap -map +/mul2dsp.v -map " + lib_path + family + "/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY " + "-D DSP_A_MAXWIDTH=32 -D DSP_B_MAXWIDTH=18 -D DSP_A_MINWIDTH=10 -D DSP_B_MINWIDTH=10 -D DSP_NAME=$__MUL32X18"); + run("chtype -set $mul t:$__soft_mul"); + run("techmap -map +/mul2dsp.v -map " + lib_path + family + "/dspv2_map.v -D USE_DSP_CFG_PARAMS=0 -D DSP_SIGNEDONLY " + "-D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=9 -D DSP_A_MINWIDTH=4 -D DSP_B_MINWIDTH=4 -D DSP_NAME=$__MUL16X9"); + run("chtype -set $mul t:$__soft_mul"); + run("ql_dsp"); + run("ql_dsp_simd -dspv2"); + run("techmap -map " + lib_path + family + "/dspv2_final_map.v"); + run("ql_dsp_io_regs -dspv2"); + } else { + log_assert(false); + } } if (check_label("coarse")) { diff --git a/tests/arch/quicklogic/qlf_k6n10f/dsp.ys b/tests/arch/quicklogic/qlf_k6n10f/dspv1_full.ys similarity index 93% rename from tests/arch/quicklogic/qlf_k6n10f/dsp.ys rename to tests/arch/quicklogic/qlf_k6n10f/dspv1_full.ys index 1e652855b..6f61b434b 100644 --- a/tests/arch/quicklogic/qlf_k6n10f/dsp.ys +++ b/tests/arch/quicklogic/qlf_k6n10f/dspv1_full.ys @@ -21,7 +21,7 @@ EOF design -save ast proc wreduce -#equiv_opt -async2sync -map +/quicklogic/qlf_k6n10f/dsp_sim.v synth_quicklogic -family qlf_k6n10f +#equiv_opt -async2sync -map +/quicklogic/qlf_k6n10f/dspv1_sim.v synth_quicklogic -family qlf_k6n10f #design -load postopt synth_quicklogic -family qlf_k6n10f cd top @@ -114,8 +114,8 @@ always @(posedge clk) begin end endmodule EOF -read_verilog +/quicklogic/qlf_k6n10f/dsp_sim.v +read_verilog +/quicklogic/qlf_k6n10f/dspv1_sim.v hierarchy -top testbench proc async2sync -sim -assert -q -clock clk -n 20 +sim -q -clock clk -n 20 -assert diff --git a/tests/arch/quicklogic/qlf_k6n10f/dspv1_simd.ys b/tests/arch/quicklogic/qlf_k6n10f/dspv1_simd.ys new file mode 100644 index 000000000..26f0ca16b --- /dev/null +++ b/tests/arch/quicklogic/qlf_k6n10f/dspv1_simd.ys @@ -0,0 +1,65 @@ +read_verilog <