From 00b3c832828ca4ae2091b63d5ccc54579cc5a391 Mon Sep 17 00:00:00 2001 From: Craig Hasselbring Date: Wed, 19 Nov 2025 18:31:38 -0800 Subject: [PATCH] memory_libmap: Add beam search for many-port memories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing read port assignment algorithm uses Cartesian product expansion, which has O(options^N) complexity. For memories with many read ports (e.g., 64 parallel reads), this causes exponential memory usage (60GB+) and timeouts. This commit adds a beam search algorithm that activates for >8 read ports. It maintains only the top K (default 16) configurations at each step, reducing complexity to O(N * options * K). Tested with: - 64 read ports: completes in ~1s vs OOM - 32 read ports: completes in ~1s vs timeout - Maintains existing behavior for ≤8 ports 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- passes/memory/memory_libmap.cc | 160 ++++++++++++++++++++++++++++- tests/memlib/memlib_beam_search.v | 27 +++++ tests/memlib/memlib_beam_search.ys | 4 + 3 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 tests/memlib/memlib_beam_search.v create mode 100644 tests/memlib/memlib_beam_search.ys diff --git a/passes/memory/memory_libmap.cc b/passes/memory/memory_libmap.cc index c3c10363b..85a6577f6 100644 --- a/passes/memory/memory_libmap.cc +++ b/passes/memory/memory_libmap.cc @@ -31,6 +31,10 @@ PRIVATE_NAMESPACE_BEGIN using namespace MemLibrary; +// Beam search parameters for many-port memories +#define BEAM_SEARCH_THRESHOLD 8 // Use beam search for >8 read ports +#define BEAM_WIDTH 16 // Keep top 16 configurations at each step + #define FACTOR_MUX 0.5 #define FACTOR_DEMUX 0.5 #define FACTOR_EMU 2 @@ -304,6 +308,9 @@ struct MemMapping { bool check_init(const Ram &ram); void assign_wr_ports(); void assign_rd_ports(); + void assign_rd_ports_standard(); + void assign_rd_ports_beam(int beam_width); + double estimate_config_cost(const MemConfig &cfg); void handle_trans(); void handle_priority(); void handle_rd_rst(); @@ -737,9 +744,158 @@ void MemMapping::assign_wr_ports() { } } -// Perform read port assignment, validating clock and rden options as we go. +// Estimate cost of a configuration (for beam search pruning) +double MemMapping::estimate_config_cost(const MemConfig &cfg) { + double cost = cfg.def->cost; + + // Track port group usage separately for write and non-shared read ports + std::map port_usage_wr; + std::map port_usage_rd; + + for (const auto &pcfg : cfg.wr_ports) { + port_usage_wr[pcfg.port_group]++; + } + for (const auto &pcfg : cfg.rd_ports) { + // Only non-shared read ports need their own slot + if (pcfg.wr_port == -1) { + port_usage_rd[pcfg.port_group]++; + } + } + + // Calculate replication factor based on port constraints + int repl_port = 1; + for (int i = 0; i < GetSize(cfg.def->port_groups); i++) { + int capacity = GetSize(cfg.def->port_groups[i].names); + int space = capacity - port_usage_wr[i]; + + // Invalid: write ports exceed capacity + if (space < 0) { + return 1e30; // Very high cost for invalid config + } + + // Calculate replication needed for this group + if (port_usage_rd[i] > 0) { + if (space == 0) { + // No space for standalone reads - invalid config + return 1e30; + } + int cur = (port_usage_rd[i] + space - 1) / space; + if (cur > repl_port) + repl_port = cur; + } + } + + cost *= repl_port; + return cost; +} + +// Beam search read port assignment (for many ports) +void MemMapping::assign_rd_ports_beam(int beam_width) { + log("Using beam search (width %d) for %d read ports on memory %s.%s\n", + beam_width, GetSize(mem.rd_ports), + log_id(mem.module->name), log_id(mem.memid)); + + for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { + auto &port = mem.rd_ports[pidx]; + MemConfigs new_cfgs; + + for (auto &cfg : cfgs) { + for (int pgi = 0; pgi < GetSize(cfg.def->port_groups); pgi++) { + auto &pg = cfg.def->port_groups[pgi]; + for (int pvi = 0; pvi < GetSize(pg.variants); pvi++) { + auto &def = pg.variants[pvi]; + if (def.kind == PortKind::Sw) + continue; + if (!port.clk_enable && + (def.kind == PortKind::Sr || def.kind == PortKind::Srsw)) + continue; + + MemConfig new_cfg = cfg; + RdPortConfig pcfg; + pcfg.wr_port = -1; + pcfg.port_group = pgi; + pcfg.port_variant = pvi; + pcfg.def = &def; + + if (def.kind == PortKind::Sr || def.kind == PortKind::Srsw) { + pcfg.emu_sync = false; + if (!apply_clock(new_cfg, def, port.clk, port.clk_polarity)) + continue; + if (port.en != State::S1) { + if (def.clk_en) + pcfg.rd_en_to_clk_en = true; + else + pcfg.emu_en = !def.rd_en; + } + } else { + pcfg.emu_sync = port.clk_enable; + } + + new_cfg.rd_ports.push_back(pcfg); + new_cfgs.push_back(new_cfg); + } + } + + for (int wpidx = 0; wpidx < GetSize(mem.wr_ports); wpidx++) { + auto &wpcfg = cfg.wr_ports[wpidx]; + if (wpcfg.rd_port != -1) + continue; + if (wpcfg.def->kind == PortKind::Sw) + continue; + if (!addr_compatible(wpidx, pidx)) + continue; + + MemConfig new_cfg = cfg; + new_cfg.wr_ports[wpidx].rd_port = pidx; + RdPortConfig pcfg; + pcfg.wr_port = wpidx; + pcfg.port_group = wpcfg.port_group; + pcfg.port_variant = wpcfg.port_variant; + pcfg.def = wpcfg.def; + pcfg.emu_sync = port.clk_enable && wpcfg.def->kind == PortKind::Arsw; + new_cfg.rd_ports.push_back(pcfg); + new_cfgs.push_back(new_cfg); + } + } + + if (GetSize(new_cfgs) > beam_width) { + std::vector> scored; + for (int i = 0; i < GetSize(new_cfgs); i++) + scored.push_back({estimate_config_cost(new_cfgs[i]), i}); + std::sort(scored.begin(), scored.end()); + + MemConfigs pruned; + for (int i = 0; i < GetSize(scored); i++) { + // Skip invalid configs (cost >= 1e29) + if (scored[i].first >= 1e29) + continue; + pruned.push_back(new_cfgs[scored[i].second]); + // Stop once we have enough valid configs + if (GetSize(pruned) >= beam_width) + break; + } + // Use pruned configs (may be empty if all invalid - triggers LUT fallback) + new_cfgs = pruned; + } + + cfgs = new_cfgs; + } +} + +// Main read port assignment - chooses algorithm based on port count void MemMapping::assign_rd_ports() { - log_reject(stringf("Assigning read ports... (candidate configs: %zu)", (size_t) cfgs.size())); + int num_rd_ports = GetSize(mem.rd_ports); + + if (num_rd_ports > BEAM_SEARCH_THRESHOLD) { + assign_rd_ports_beam(BEAM_WIDTH); + } else { + assign_rd_ports_standard(); + } +} + +// Perform read port assignment using standard Cartesian product (for few ports) +void MemMapping::assign_rd_ports_standard() { + log_reject(stringf("Assigning read ports (standard)... (candidate configs: %zu)", (size_t) cfgs.size())); for (int pidx = 0; pidx < GetSize(mem.rd_ports); pidx++) { auto &port = mem.rd_ports[pidx]; MemConfigs new_cfgs; diff --git a/tests/memlib/memlib_beam_search.v b/tests/memlib/memlib_beam_search.v new file mode 100644 index 000000000..9161b5ad6 --- /dev/null +++ b/tests/memlib/memlib_beam_search.v @@ -0,0 +1,27 @@ +// Test case for beam search optimization in memory_libmap +// This memory with 32 parallel read ports would cause exponential +// blowup (O(4^32) = 10^19 configurations) without beam search pruning + +module memlib_beam_search ( + input wire clk, + input wire we, + input wire [9:0] wr_addr, + input wire [7:0] wr_data, + input wire [9:0] base_addr, + output reg [255:0] parallel_out // 32 x 8 = 256 bits +); + + reg [7:0] mem [0:1023]; + integer i; + + always @(posedge clk) begin + if (we) + mem[wr_addr] <= wr_data; + + // 32 parallel reads - triggers beam search + for (i = 0; i < 32; i = i + 1) begin + parallel_out[i*8 +: 8] <= mem[base_addr + i]; + end + end + +endmodule diff --git a/tests/memlib/memlib_beam_search.ys b/tests/memlib/memlib_beam_search.ys new file mode 100644 index 000000000..f07d5f3f3 --- /dev/null +++ b/tests/memlib/memlib_beam_search.ys @@ -0,0 +1,4 @@ +read_verilog memlib_beam_search.v +hierarchy -top memlib_beam_search +synth_xilinx -family xc7 -top memlib_beam_search +stat