write_cxxrtl: improve writable memory handling.

This commit reduces space and time overhead for writable memories to O(write port count) in both cases; implements handling for write port priorities; and simplifies runtime representation of memories.
2025-12-18 02:08:33 +00:00 · 2020-04-05 02:06:26 +00:00 · 2020-04-05 02:06:26 +00:00 · 01e6850bd3
commit 01e6850bd3
parent fb0270b752
2 changed files with 88 additions and 66 deletions
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@ -1,7 +1,7 @@
 /*
 *  yosys -- Yosys Open SYnthesis Suite
 *
- *  Copyright (C) 2019  whitequark <whitequark@whitequark.org>
+ *  Copyright (C) 2019-2020  whitequark <whitequark@whitequark.org>
 *
 *  Permission to use, copy, modify, and/or distribute this software for any
 *  purpose with or without fee is hereby granted.
@ -28,6 +28,7 @@
 #include <type_traits>
 #include <tuple>
 #include <vector>
+#include <algorithm>
 #include <sstream>

 // The cxxrtl support library implements compile time specialized arbitrary width arithmetics, as well as provides
@ -73,9 +74,6 @@ struct value : public expr_base<value<Bits>> {
 	template<typename... Init>
 	explicit constexpr value(Init ...init) : data{init...} {}

-	// This allows using value<> as well as wire<> in memory initializers.
-	using init = value<Bits>;
-
 	value(const value<Bits> &) = default;
 	value(value<Bits> &&) = default;
 	value<Bits> &operator=(const value<Bits> &) = default;
@ -297,7 +295,7 @@ struct value : public expr_base<value<Bits>> {
 		return result;
 	}

-	value<Bits> update(const value<Bits> &mask, const value<Bits> &val) const {
+	value<Bits> update(const value<Bits> &val, const value<Bits> &mask) const {
 		return bit_and(mask.bit_not()).bit_or(val.bit_and(mask));
 	}

@ -559,19 +557,6 @@ struct wire {
 	wire(wire<Bits> &&) = default;
 	wire<Bits> &operator=(const wire<Bits> &) = delete;

-	// We want to avoid having operator=(wire<>) or operator=(value<>) that overwrites both curr and next,
-	// since this operation is almost always wrong. But we also need an operation like that for memory
-	// initialization. This is solved by adding a wrapper and making the use of operator= valid only when
-	// this wrapper is used.
-	struct init {
-		value<Bits> data;
-	};
-
-	wire<Bits> &operator=(const init &init) {
-		curr = next = init.data;
-		return *this;
-	}
-
 	bool commit() {
 		if (curr != next) {
 			curr = next;
@ -587,12 +572,10 @@ std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) {
 	return os;
 }

-template<class Elem>
+template<size_t Width>
 struct memory {
-	using StoredElem = typename std::remove_const<Elem>::type;
-	std::vector<StoredElem> data;
+	std::vector<value<Width>> data;

-	static constexpr size_t width = StoredElem::bits;
 	size_t depth() const {
 		return data.size();
 	}
@ -600,8 +583,8 @@ struct memory {
 	memory() = delete;
 	explicit memory(size_t depth) : data(depth) {}

-	memory(const memory<Elem> &) = delete;
-	memory<Elem> &operator=(const memory<Elem> &) = delete;
+	memory(const memory<Width> &) = delete;
+	memory<Width> &operator=(const memory<Width> &) = delete;

 	// The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
 	// into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
@ -610,7 +593,7 @@ struct memory {
 	template<size_t Size>
 	struct init {
 		size_t offset;
-		typename Elem::init data[Size];
+		value<Width> data[Size];
 	};

 	template<size_t... InitSize>
@ -621,18 +604,56 @@ struct memory {
 		auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...};
 	}

-	Elem &operator [](size_t index) {
+	value<Width> &operator [](size_t index) {
 		assert(index < data.size());
 		return data[index];
 	}
+
+	const value<Width> &operator [](size_t index) const {
+		assert(index < data.size());
+		return data[index];
+	}
+
+	// A simple way to make a writable memory would be to use an array of wires instead of an array of values.
+	// However, there are two significant downsides to this approach: first, it has large overhead (2× space
+	// overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
+	// priorities. Although in principle write ports could be ordered or conditionally enabled in generated
+	// code based on their priorities and selected addresses, the feedback arc set problem is computationally
+	// expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
+	// a particular write port evaluation order.
+	//
+	// The approach used here instead is to queue writes into a buffer during the eval phase, then perform
+	// the writes during the commit phase in the priority order. This approach has low overhead, with both space
+	// and time proportional to the amount of write ports. Because virtually every memory in a practical design
+	// has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
+	struct write {
+		size_t index;
+		value<Width> val;
+		value<Width> mask;
+		int priority;
+	};
+	std::vector<write> write_queue;
+
+	void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) {
+		assert(index < data.size());
+		write_queue.emplace_back(write { index, val, mask, priority });
+	}
+
+	bool commit() {
+		bool changed = false;
+		std::sort(write_queue.begin(), write_queue.end(),
+			[](const write &a, const write &b) { return a.priority < b.priority; });
+		for (const write &entry : write_queue) {
+			value<Width> elem = data[entry.index];
+			elem = elem.update(entry.val, entry.mask);
+			changed |= (data[entry.index] != elem);
+			data[entry.index] = elem;
+		}
+		write_queue.clear();
+		return changed;
+	}
 };

-template<size_t Width>
-using memory_rw = memory<wire<Width>>;
-
-template<size_t Width>
-using memory_ro = memory<const value<Width>>;
-
 struct module {
 	module() {}
 	virtual ~module() {}
@ -1098,15 +1119,19 @@ value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) {
 }

 // Memory helper
-template<size_t BitsAddr>
-std::pair<bool, size_t> memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
-	static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
-	size_t offset_index = addr.data[0];
+struct memory_index {
+	bool valid;
+	size_t index;

-	bool valid = (offset_index >= offset && offset_index < offset + depth);
-	size_t index = offset_index - offset;
-	return std::make_pair(valid, index);
-}
+	template<size_t BitsAddr>
+	memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
+		static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
+		size_t offset_index = addr.data[0];
+
+		valid = (offset_index >= offset && offset_index < offset + depth);
+		index = offset_index - offset;
+	}
+};

 } // namespace cxxrtl_yosys