From ff67ef6377d3331fa7424ddf88e5103e692a3f03 Mon Sep 17 00:00:00 2001 From: "Emil J. Tywoniak" Date: Tue, 24 Feb 2026 15:41:47 +0100 Subject: [PATCH] opt_clean: refactor --- passes/opt/opt_clean/cells_all.cc | 576 +++++++++++++++++------------- 1 file changed, 318 insertions(+), 258 deletions(-) diff --git a/passes/opt/opt_clean/cells_all.cc b/passes/opt/opt_clean/cells_all.cc index 353ada180..7f3de58ec 100644 --- a/passes/opt/opt_clean/cells_all.cc +++ b/passes/opt/opt_clean/cells_all.cc @@ -18,282 +18,342 @@ */ #include "kernel/ffinit.h" +#include "kernel/yosys_common.h" #include "passes/opt/opt_clean/opt_clean.h" -YOSYS_NAMESPACE_BEGIN +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN unsigned int hash_bit(const SigBit &bit) { return static_cast(hash_ops::hash(bit).yield()); } +SigMap wire_sigmap(const RTLIL::Module* mod) { + SigMap map; + for (auto &it : mod->connections_) { + for (int i = 0; i < GetSize(it.second); i++) { + if (it.second[i].wire != nullptr) + map.add(it.first[i], it.second[i]); + } + } + return map; +} + +struct WireDrivers; +// Maps from a SigBit to a unique driver cell. +struct WireDriver { + using Accumulated = WireDrivers; + SigBit bit; + int driver_cell; +}; +// Maps from a SigBit to one or more driver cells. +struct WireDrivers { + WireDrivers() : driver_cell(0) {} + WireDrivers(WireDriver driver) : bit(driver.bit), driver_cell(driver.driver_cell) {} + WireDrivers(SigBit bit) : bit(bit), driver_cell(0) {} + WireDrivers(WireDrivers &&other) = default; + + class const_iterator { + public: + const_iterator(const WireDrivers &drivers, bool end) + : driver_cell(drivers.driver_cell), in_extra_cells(end) { + if (drivers.extra_driver_cells) { + if (end) { + extra_it = drivers.extra_driver_cells->end(); + } else { + extra_it = drivers.extra_driver_cells->begin(); + } + } + } + int operator*() const { + if (in_extra_cells) + return **extra_it; + return driver_cell; + } + const_iterator& operator++() { + if (in_extra_cells) + ++*extra_it; + else + in_extra_cells = true; + return *this; + } + bool operator!=(const const_iterator &other) const { + return !(*this == other); + } + bool operator==(const const_iterator &other) const { + return in_extra_cells == other.in_extra_cells && + extra_it == other.extra_it; + } + private: + std::optional::iterator> extra_it; + int driver_cell; + bool in_extra_cells; + }; + + const_iterator begin() const { return const_iterator(*this, false); } + const_iterator end() const { return const_iterator(*this, true); } + + SigBit bit; + int driver_cell; + std::unique_ptr> extra_driver_cells; +}; +struct WireDriversKeyEquality { + bool operator()(const WireDrivers &a, const WireDrivers &b) const { + return a.bit == b.bit; + } +}; +struct WireDriversCollisionHandler { + void operator()(WireDrivers &incumbent, WireDrivers &new_value) const { + log_assert(new_value.extra_driver_cells == nullptr); + if (!incumbent.extra_driver_cells) + incumbent.extra_driver_cells.reset(new pool()); + incumbent.extra_driver_cells->insert(new_value.driver_cell); + } +}; +using Wire2Drivers = ShardedHashtable; + +struct CellAnalysis { + Wire2Drivers wire2driver; + dict> mem2cells; + ShardedVector keep_wires; + std::vector> unused; + ConcurrentWorkQueue cell_queue; + ShardedVector> driver_driver_logs; + + CellAnalysis(const SigMap& wire_map, AnalysisContext& actx, CleanRunContext &clean_ctx) : mem2cells(), keep_wires(actx.subpool), unused(actx.mod->cells_size()), cell_queue(actx.subpool.num_threads()), driver_driver_logs(actx.subpool) { + Wire2Drivers::Builder wire2driver_builder(actx.subpool); + ShardedVector> mem2cells_vector(actx.subpool); + + // Enqueue kept cells into cell_queue + // Prepare input cone traversal from wire to driver cell as wire2driver + // Prepare "input cone" traversal from memory to write port or meminit as mem2cells + // Also check driver conflicts + // Also mark cells unused to true unless keep (we override this later) + actx.subpool.run([this, &wire_map, &mem2cells_vector, &wire2driver_builder, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + for (int i : ctx.item_range(actx.mod->cells_size())) { + Cell *cell = actx.mod->cell_at(i); + if (cell->type.in(ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2))) + mem2cells_vector.insert(ctx, {cell->getParam(ID::MEMID).decode_string(), i}); + + for (auto &it2 : cell->connections()) { + if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_output(cell->type, it2.first)) + continue; + for (auto raw_bit : it2.second) { + if (raw_bit.wire == nullptr) + continue; + auto bit = actx.assign_map(raw_bit); + if (bit.wire == nullptr && clean_ctx.ct_all.cell_known(cell->type)) { + std::string msg = stringf("Driver-driver conflict " + "for %s between cell %s.%s and constant %s in %s: Resolved using constant.", + log_signal(raw_bit), cell->name.unescape(), it2.first.unescape(), log_signal(bit), actx.mod->name.unescape()); + driver_driver_logs.insert(ctx, {wire_map(raw_bit), msg}); + } + if (bit.wire != nullptr) + wire2driver_builder.insert(ctx, {{bit, i}, hash_bit(bit)}); + } + } + bool keep = clean_ctx.keep_cache.query(cell); + unused[i].store(!keep, std::memory_order_relaxed); + if (keep) + cell_queue.push(ctx, i); + } + for (int i : ctx.item_range(actx.mod->wires_size())) { + Wire *wire = actx.mod->wire_at(i); + if (wire->port_output || wire->get_bool_attribute(ID::keep)) + keep_wires.insert(ctx, wire); + } + }); + // Finish by merging per-thread collected data + actx.subpool.run([&wire2driver_builder](const ParallelDispatchThreadPool::RunCtx &ctx) { + wire2driver_builder.process(ctx); + }); + wire2driver = wire2driver_builder; + + for (std::pair &mem2cell : mem2cells_vector) + mem2cells[mem2cell.first].insert(mem2cell.second); + } + pool raw_wires_from_keep(const SigMap& sigmap, const SigMap& wire_map, int num_threads) { + // Also enqueue cells that drive kept wires into cell_queue + // and mark those cells as used + // and mark all bits of those wires as used + pool used_raw_bits; + int i = 0; + for (Wire *wire : keep_wires) { + for (auto bit : sigmap(wire)) { + const WireDrivers *drivers = wire2driver.find({{bit}, hash_bit(bit)}); + if (drivers != nullptr) + for (int cell_index : *drivers) + if (unused[cell_index].exchange(false, std::memory_order_relaxed)) { + ThreadIndex fake_thread_index = {i++ % num_threads}; + cell_queue.push(fake_thread_index, cell_index); + } + } + for (auto raw_bit : SigSpec(wire)) + used_raw_bits.insert(wire_map(raw_bit)); + } + return used_raw_bits; + } + void queue_cell_if_used(int cell_idx, const ParallelDispatchThreadPool::RunCtx &ctx) { + if (unused[cell_idx].exchange(false, std::memory_order_relaxed)) + cell_queue.push(ctx, cell_idx); + } + void print_warnings(pool& used_raw_bits, const SigMap& wire_map, const RTLIL::Module* mod, CleanRunContext &clean_ctx) { + if (!driver_driver_logs.empty()) { + // We could do this in parallel but hopefully this is rare. + for (auto [_, cell] : mod->cells_) { + for (auto &[port, sig] : cell->connections()) { + if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_input(cell->type, port)) + continue; + for (auto raw_bit : wire_map(sig)) + used_raw_bits.insert(raw_bit); + } + } + for (std::pair &it : driver_driver_logs) { + if (used_raw_bits.count(it.first)) + log_warning("%s\n", it.second); + } + } + } +}; + +struct MemAnalysis { + std::vector> unused; + dict indices; + MemAnalysis(RTLIL::Module* mod) : unused(mod->memories.size()), indices() { + for (int i = 0; i < GetSize(mod->memories); ++i) { + indices[mod->memories.element(i)->first.str()] = i; + unused[i].store(true, std::memory_order_relaxed); + } + } + + /** + * Functionally, analysis access is read-only + */ + void fixup_unused(CellAnalysis& analysis, AnalysisContext& actx, CleanRunContext &clean_ctx) { + // Processes the cell queue in batches, traversing input cones by enqueuing more cells + // Discover and mark used memories and cells + actx.subpool.run([this, &analysis, &actx, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { + pool bits; + pool mems; + while (true) { + std::vector cell_indices = analysis.cell_queue.pop_batch(ctx); + if (cell_indices.empty()) + return; + for (auto cell_index : cell_indices) { + Cell *cell = actx.mod->cell_at(cell_index); + for (auto &it : cell->connections()) + if (!clean_ctx.ct_all.cell_known(cell->type) || clean_ctx.ct_all.cell_input(cell->type, it.first)) + for (auto bit : actx.assign_map(it.second)) + bits.insert(bit); + + if (cell->type.in(ID($memrd), ID($memrd_v2))) { + std::string mem_id = cell->getParam(ID::MEMID).decode_string(); + if (indices.count(mem_id)) { + int mem_index = indices[mem_id]; + // This is the actual fixup, everything else is just traversal + if (unused[mem_index].exchange(false, std::memory_order_relaxed)) + mems.insert(mem_id); + } + } + } + + for (auto bit : bits) { + const WireDrivers *drivers = analysis.wire2driver.find({{bit}, hash_bit(bit)}); + if (drivers != nullptr) + for (int cell_idx : *drivers) + analysis.queue_cell_if_used(cell_idx, ctx); + } + bits.clear(); + + for (auto mem : mems) { + if (analysis.mem2cells.count(mem) == 0) + continue; + for (int cell_idx : analysis.mem2cells.at(mem)) + analysis.queue_cell_if_used(cell_idx, ctx); + } + mems.clear(); + } + }); + } +}; + +/** + * Functionally, analysis access is read-only + */ +pool all_unused_cells(const Module *mod, CellAnalysis& analysis, ParallelDispatchThreadPool::Subpool &subpool) { + pool unused_cells; + { + ShardedVector sharded_unused_cells(subpool); + subpool.run([mod, &analysis, &sharded_unused_cells](const ParallelDispatchThreadPool::RunCtx &ctx) { + // Parallel destruction of `wire2driver` + analysis.wire2driver.clear(ctx); + for (int i : ctx.item_range(mod->cells_size())) + if (analysis.unused[i].load(std::memory_order_relaxed)) + sharded_unused_cells.insert(ctx, i); + }); + for (int cell_index : sharded_unused_cells) + unused_cells.insert(mod->cell_at(cell_index)); + unused_cells.sort(RTLIL::sort_by_name_id()); + } + return unused_cells; +} + +void remove_cells(RTLIL::Module* mod, FfInitVals& ffinit, const pool& cells, bool verbose, RmStats& stats) { + for (auto cell : cells) { + if (verbose) + log_debug(" removing unused `%s' cell `%s'.\n", cell->type, cell->name); + mod->design->scratchpad_set_bool("opt.did_something", true); + if (cell->is_builtin_ff()) + ffinit.remove_init(cell->getPort(ID::Q)); + mod->remove(cell); + stats.count_rm_cells++; + } +} + +void remove_mems(RTLIL::Module* mod, const MemAnalysis& mem_analysis, bool verbose) { + for (const auto &it : mem_analysis.indices) { + if (!mem_analysis.unused[it.second].load(std::memory_order_relaxed)) + continue; + RTLIL::IdString id(it.first); + if (verbose) + log_debug(" removing unused memory `%s'.\n", id.unescape()); + delete mod->memories.at(id); + mod->memories.erase(id); + } +} + +PRIVATE_NAMESPACE_END + +YOSYS_NAMESPACE_BEGIN + void rmunused_module_cells(Module *module, ParallelDispatchThreadPool::Subpool &subpool, CleanRunContext &clean_ctx) { + AnalysisContext actx(module, subpool); SigMap sigmap(module); FfInitVals ffinit; ffinit.set_parallel(&sigmap, subpool.thread_pool(), module); - SigMap raw_sigmap; - for (auto &it : module->connections_) { - for (int i = 0; i < GetSize(it.second); i++) { - if (it.second[i].wire != nullptr) - raw_sigmap.add(it.first[i], it.second[i]); - } - } + // Formerly known as raw_sigmap + // TODO What exactly makes it "raw"? No constants on the rhs? + // Otherwise, "raw" is used to mean "not sigmapped" + SigMap wire_map = wire_sigmap(module); - struct WireDrivers; - // Maps from a SigBit to a unique driver cell. - struct WireDriver { - using Accumulated = WireDrivers; - SigBit bit; - int driver_cell; - }; - // Maps from a SigBit to one or more driver cells. - struct WireDrivers { - WireDrivers() : driver_cell(0) {} - WireDrivers(WireDriver driver) : bit(driver.bit), driver_cell(driver.driver_cell) {} - WireDrivers(SigBit bit) : bit(bit), driver_cell(0) {} - WireDrivers(WireDrivers &&other) = default; + CellAnalysis analysis(wire_map, actx, clean_ctx); + pool used_raw_bits = analysis.raw_wires_from_keep(sigmap, wire_map, subpool.num_threads()); - class const_iterator { - public: - const_iterator(const WireDrivers &drivers, bool end) - : driver_cell(drivers.driver_cell), in_extra_cells(end) { - if (drivers.extra_driver_cells) { - if (end) { - extra_it = drivers.extra_driver_cells->end(); - } else { - extra_it = drivers.extra_driver_cells->begin(); - } - } - } - int operator*() const { - if (in_extra_cells) - return **extra_it; - return driver_cell; - } - const_iterator& operator++() { - if (in_extra_cells) - ++*extra_it; - else - in_extra_cells = true; - return *this; - } - bool operator!=(const const_iterator &other) const { - return !(*this == other); - } - bool operator==(const const_iterator &other) const { - return in_extra_cells == other.in_extra_cells && - extra_it == other.extra_it; - } - private: - std::optional::iterator> extra_it; - int driver_cell; - bool in_extra_cells; - }; - - const_iterator begin() const { return const_iterator(*this, false); } - const_iterator end() const { return const_iterator(*this, true); } - - SigBit bit; - int driver_cell; - std::unique_ptr> extra_driver_cells; - }; - struct WireDriversKeyEquality { - bool operator()(const WireDrivers &a, const WireDrivers &b) const { - return a.bit == b.bit; - } - }; - struct WireDriversCollisionHandler { - void operator()(WireDrivers &incumbent, WireDrivers &new_value) const { - log_assert(new_value.extra_driver_cells == nullptr); - if (!incumbent.extra_driver_cells) - incumbent.extra_driver_cells.reset(new pool()); - incumbent.extra_driver_cells->insert(new_value.driver_cell); - } - }; - using Wire2Drivers = ShardedHashtable; - - Wire2Drivers::Builder wire2driver_builder(subpool); - ShardedVector> mem2cells_vector(subpool); - ShardedVector> driver_driver_logs(subpool); - ShardedVector keep_wires(subpool); - const RTLIL::Module *const_module = module; - int num_threads = subpool.num_threads(); - ConcurrentWorkQueue cell_queue(num_threads); - std::vector> unused(const_module->cells_size()); - - // Enqueue kept cells into cell_queue - // Prepare input cone traversal from wire to driver cell as wire2driver - // Prepare "input cone" traversal from memory to write port or meminit as mem2cells - // Also check driver conflicts - // Also mark cells unused to true unless keep (we override this later) - subpool.run([&sigmap, &raw_sigmap, const_module, &mem2cells_vector, &driver_driver_logs, &keep_wires, &cell_queue, &wire2driver_builder, &clean_ctx, &unused](const ParallelDispatchThreadPool::RunCtx &ctx) { - for (int i : ctx.item_range(const_module->cells_size())) { - Cell *cell = const_module->cell_at(i); - if (cell->type.in(ID($memwr), ID($memwr_v2), ID($meminit), ID($meminit_v2))) - mem2cells_vector.insert(ctx, {cell->getParam(ID::MEMID).decode_string(), i}); - - for (auto &it2 : cell->connections()) { - if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_output(cell->type, it2.first)) - continue; - for (auto raw_bit : it2.second) { - if (raw_bit.wire == nullptr) - continue; - auto bit = sigmap(raw_bit); - if (bit.wire == nullptr && clean_ctx.ct_all.cell_known(cell->type)) { - std::string msg = stringf("Driver-driver conflict " - "for %s between cell %s.%s and constant %s in %s: Resolved using constant.", - log_signal(raw_bit), cell->name.unescape(), it2.first.unescape(), log_signal(bit), const_module->name.unescape()); - driver_driver_logs.insert(ctx, {raw_sigmap(raw_bit), msg}); - } - if (bit.wire != nullptr) - wire2driver_builder.insert(ctx, {{bit, i}, hash_bit(bit)}); - } - } - bool keep = clean_ctx.keep_cache.query(cell); - unused[i].store(!keep, std::memory_order_relaxed); - if (keep) - cell_queue.push(ctx, i); - } - for (int i : ctx.item_range(const_module->wires_size())) { - Wire *wire = const_module->wire_at(i); - if (wire->port_output || wire->get_bool_attribute(ID::keep)) - keep_wires.insert(ctx, wire); - } - }); - // Finish by merging per-thread collected data - subpool.run([&wire2driver_builder](const ParallelDispatchThreadPool::RunCtx &ctx) { - wire2driver_builder.process(ctx); - }); - Wire2Drivers wire2driver(wire2driver_builder); - dict> mem2cells; - for (std::pair &mem2cell : mem2cells_vector) - mem2cells[mem2cell.first].insert(mem2cell.second); - - // Also enqueue cells that drive kept wires into cell_queue - // and mark those cells as used - // and mark all bits of those wires as used - pool used_raw_bits; - int i = 0; - for (Wire *wire : keep_wires) { - for (auto bit : sigmap(wire)) { - const WireDrivers *drivers = wire2driver.find({{bit}, hash_bit(bit)}); - if (drivers != nullptr) - for (int cell_index : *drivers) - if (unused[cell_index].exchange(false, std::memory_order_relaxed)) { - ThreadIndex fake_thread_index = {i++ % num_threads}; - cell_queue.push(fake_thread_index, cell_index); - } - } - for (auto raw_bit : SigSpec(wire)) - used_raw_bits.insert(raw_sigmap(raw_bit)); - } - - // Mark all memories as unused (we override this later) - std::vector> mem_unused(module->memories.size()); - dict mem_indices; - for (int i = 0; i < GetSize(module->memories); ++i) { - mem_indices[module->memories.element(i)->first.str()] = i; - mem_unused[i].store(true, std::memory_order_relaxed); - } - - // Discover and mark used memories and cells - // Processes the cell queue in batches, traversing input cones by enqueuing more cells - subpool.run([const_module, &sigmap, &wire2driver, &mem2cells, &unused, &cell_queue, &mem_indices, &mem_unused, &clean_ctx](const ParallelDispatchThreadPool::RunCtx &ctx) { - pool bits; - pool mems; - while (true) { - std::vector cell_indices = cell_queue.pop_batch(ctx); - if (cell_indices.empty()) - return; - for (auto cell_index : cell_indices) { - Cell *cell = const_module->cell_at(cell_index); - for (auto &it : cell->connections()) - if (!clean_ctx.ct_all.cell_known(cell->type) || clean_ctx.ct_all.cell_input(cell->type, it.first)) - for (auto bit : sigmap(it.second)) - bits.insert(bit); - - if (cell->type.in(ID($memrd), ID($memrd_v2))) { - std::string mem_id = cell->getParam(ID::MEMID).decode_string(); - if (mem_indices.count(mem_id)) { - int mem_index = mem_indices[mem_id]; - if (mem_unused[mem_index].exchange(false, std::memory_order_relaxed)) - mems.insert(mem_id); - } - } - } - - for (auto bit : bits) { - const WireDrivers *drivers = wire2driver.find({{bit}, hash_bit(bit)}); - if (drivers != nullptr) - for (int cell_index : *drivers) - if (unused[cell_index].exchange(false, std::memory_order_relaxed)) - cell_queue.push(ctx, cell_index); - } - bits.clear(); - - for (auto mem : mems) { - if (mem2cells.count(mem) == 0) - continue; - for (int cell_index : mem2cells.at(mem)) - if (unused[cell_index].exchange(false, std::memory_order_relaxed)) - cell_queue.push(ctx, cell_index); - } - mems.clear(); - } - }); + // Mark all memories as unused initially + MemAnalysis mem_analysis(module); + // then fix that by traversing design with analysis.cell_queue + mem_analysis.fixup_unused(analysis, actx, clean_ctx); + // mem_analysis is now correct + // analysis and mem_analysis now are functionally finalized and read-only // Set of all unused cells, built in parallel from unused by filtering for unused[i]==true - pool unused_cells; - { - ShardedVector sharded_unused_cells(subpool); - subpool.run([const_module, &unused, &sharded_unused_cells, &wire2driver](const ParallelDispatchThreadPool::RunCtx &ctx) { - // Parallel destruction of `wire2driver` - wire2driver.clear(ctx); - for (int i : ctx.item_range(const_module->cells_size())) - if (unused[i].load(std::memory_order_relaxed)) - sharded_unused_cells.insert(ctx, i); - }); - for (int cell_index : sharded_unused_cells) - unused_cells.insert(const_module->cell_at(cell_index)); - unused_cells.sort(RTLIL::sort_by_name_id()); - } + pool unused_cells = all_unused_cells(module, analysis, subpool); - for (auto cell : unused_cells) { - if (clean_ctx.flags.verbose) - log_debug(" removing unused `%s' cell `%s'.\n", cell->type, cell->name); - module->design->scratchpad_set_bool("opt.did_something", true); - if (cell->is_builtin_ff()) - ffinit.remove_init(cell->getPort(ID::Q)); - module->remove(cell); - clean_ctx.stats.count_rm_cells++; - } - - for (const auto &it : mem_indices) { - if (!mem_unused[it.second].load(std::memory_order_relaxed)) - continue; - RTLIL::IdString id(it.first); - if (clean_ctx.flags.verbose) - log_debug(" removing unused memory `%s'.\n", id.unescape()); - delete module->memories.at(id); - module->memories.erase(id); - } - - if (!driver_driver_logs.empty()) { - // We could do this in parallel but hopefully this is rare. - for (auto [_, cell] : module->cells_) { - for (auto &[port, sig] : cell->connections()) { - if (clean_ctx.ct_all.cell_known(cell->type) && !clean_ctx.ct_all.cell_input(cell->type, port)) - continue; - for (auto raw_bit : raw_sigmap(sig)) - used_raw_bits.insert(raw_bit); - } - } - for (std::pair &it : driver_driver_logs) { - if (used_raw_bits.count(it.first)) - log_warning("%s\n", it.second); - } - } + remove_cells(module, ffinit, unused_cells, clean_ctx.flags.verbose, clean_ctx.stats); + remove_mems(module, mem_analysis, clean_ctx.flags.verbose); + analysis.print_warnings(used_raw_bits, wire_map, module, clean_ctx); } YOSYS_NAMESPACE_END