diff --git a/backends/cxxrtl/cxxrtl.cc b/backends/cxxrtl/cxxrtl.cc
index ef8335e50..237700b29 100644
--- a/backends/cxxrtl/cxxrtl.cc
+++ b/backends/cxxrtl/cxxrtl.cc
@@ -171,6 +171,11 @@ struct Scheduler {
 	}
 };
 
+bool is_input_wire(const RTLIL::Wire *wire)
+{
+	return wire->port_input && !wire->port_output;
+}
+
 bool is_unary_cell(RTLIL::IdString type)
 {
 	return type.in(
@@ -210,11 +215,54 @@ bool is_internal_cell(RTLIL::IdString type)
 	return type[0] == '$' && !type.begins_with("$paramod\\");
 }
 
+bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
+{
+	RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+	log_assert(cell_module != nullptr);
+	return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
+}
+
+enum class CxxrtlPortType {
+	UNKNOWN = 0, // or mixed comb/sync
+	COMB = 1,
+	SYNC = 2,
+};
+
+CxxrtlPortType cxxrtl_port_type(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+	RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+	if (cell_module == nullptr || !cell_module->get_bool_attribute(ID(cxxrtl.blackbox)))
+		return CxxrtlPortType::UNKNOWN;
+	RTLIL::Wire *cell_output_wire = cell_module->wire(port);
+	log_assert(cell_output_wire != nullptr);
+	bool is_comb = cell_output_wire->get_bool_attribute(ID(cxxrtl.comb));
+	bool is_sync = cell_output_wire->get_bool_attribute(ID(cxxrtl.sync));
+	if (is_comb && is_sync)
+		log_cmd_error("Port `%s.%s' is marked as both `cxxrtl.comb` and `cxxrtl.sync`.\n",
+		              log_id(cell_module), log_signal(cell_output_wire));
+	else if (is_comb)
+		return CxxrtlPortType::COMB;
+	else if (is_sync)
+		return CxxrtlPortType::SYNC;
+	return CxxrtlPortType::UNKNOWN;
+}
+
+bool is_cxxrtl_comb_port(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+	return cxxrtl_port_type(cell, port) == CxxrtlPortType::COMB;
+}
+
+bool is_cxxrtl_sync_port(const RTLIL::Cell *cell, RTLIL::IdString port)
+{
+	return cxxrtl_port_type(cell, port) == CxxrtlPortType::SYNC;
+}
+
 struct FlowGraph {
 	struct Node {
 		enum class Type {
 			CONNECT,
-			CELL,
+			CELL_SYNC,
+			CELL_EVAL,
 			PROCESS
 		};
 
@@ -225,7 +273,7 @@ struct FlowGraph {
 	};
 
 	std::vector<Node*> nodes;
-	dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_defs, wire_uses;
+	dict<const RTLIL::Wire*, pool<Node*, hash_ptr_ops>> wire_comb_defs, wire_sync_defs, wire_uses;
 	dict<const RTLIL::Wire*, bool> wire_def_elidable, wire_use_elidable;
 
 	~FlowGraph()
@@ -234,13 +282,17 @@ struct FlowGraph {
 			delete node;
 	}
 
-	void add_defs(Node *node, const RTLIL::SigSpec &sig, bool elidable)
+	void add_defs(Node *node, const RTLIL::SigSpec &sig, bool fully_sync, bool elidable)
 	{
 		for (auto chunk : sig.chunks())
-			if (chunk.wire)
-				wire_defs[chunk.wire].insert(node);
-		// Only defs of an entire wire in the right order can be elided.
-		if (sig.is_wire())
+			if (chunk.wire) {
+				if (fully_sync)
+					wire_sync_defs[chunk.wire].insert(node);
+				else
+					wire_comb_defs[chunk.wire].insert(node);
+			}
+		// Only comb defs of an entire wire in the right order can be elided.
+		if (!fully_sync && sig.is_wire())
 			wire_def_elidable[sig.as_wire()] = elidable;
 	}
 
@@ -268,7 +320,7 @@ struct FlowGraph {
 	// Connections
 	void add_connect_defs_uses(Node *node, const RTLIL::SigSig &conn)
 	{
-		add_defs(node, conn.first, /*elidable=*/true);
+		add_defs(node, conn.first, /*fully_sync=*/false, /*elidable=*/true);
 		add_uses(node, conn.second);
 	}
 
@@ -283,21 +335,59 @@ struct FlowGraph {
 	}
 
 	// Cells
-	void add_cell_defs_uses(Node *node, const RTLIL::Cell *cell)
+	void add_cell_sync_defs(Node *node, const RTLIL::Cell *cell)
+	{
+		// To understand why this node type is necessary and why it produces comb defs, consider a cell
+		// with input \i and sync output \o, used in a design such that \i is connected to \o. This does
+		// not result in a feedback arc because the output is synchronous. However, a naive implementation
+		// of code generation for cells that assigns to inputs, evaluates cells, assigns from outputs
+		// would not be able to immediately converge...
+		//
+		//   wire<1> i_tmp;
+		//   cell->p_i = i_tmp.curr;
+		//   cell->eval();
+		//   i_tmp.next = cell->p_o.curr;
+		//
+		// ... since the wire connecting the input and output ports would not be localizable. To solve
+		// this, the cell is split into two scheduling nodes; one exclusively for sync outputs, and
+		// another for inputs and all non-sync outputs. This way the generated code can be rearranged...
+		//
+		//   value<1> i_tmp;
+		//   i_tmp = cell->p_o.curr;
+		//   cell->p_i = i_tmp;
+		//   cell->eval();
+		//
+		// eliminating the unnecessary delta cycle. Conceptually, the CELL_SYNC node type is a series of
+		// connections of the form `connect \lhs \cell.\sync_output`; the right-hand side of these is not
+		// as a wire in RTLIL. If it was expressible, then `\cell.\sync_output` would have a sync def,
+		// and this node would be an ordinary CONNECT node, with `\lhs` having a comb def. Because it isn't,
+		// a special node type is used, the right-hand side does not appear anywhere, and the left-hand
+		// side has a comb def.
+		for (auto conn : cell->connections())
+			if (cell->output(conn.first))
+				if (is_cxxrtl_sync_port(cell, conn.first)) {
+					// See note regarding elidability below.
+					add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
+				}
+	}
+
+	void add_cell_eval_defs_uses(Node *node, const RTLIL::Cell *cell)
 	{
-		log_assert(cell->known());
 		for (auto conn : cell->connections()) {
 			if (cell->output(conn.first)) {
-				if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
-					/* non-combinatorial outputs do not introduce defs */;
-				else if (is_elidable_cell(cell->type))
-					add_defs(node, conn.second, /*elidable=*/true);
+				if (is_elidable_cell(cell->type))
+					add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/true);
+				else if (is_sync_ff_cell(cell->type) || (cell->type == ID($memrd) && cell->getParam(ID::CLK_ENABLE).as_bool()))
+					add_defs(node, conn.second, /*fully_sync=*/true,  /*elidable=*/false);
 				else if (is_internal_cell(cell->type))
-					add_defs(node, conn.second, /*elidable=*/false);
-				else {
-					// Unlike outputs of internal cells (which generate code that depends on the ability to set the output
-					// wire bits), outputs of user cells are normal wires, and the wires connected to them can be elided.
-					add_defs(node, conn.second, /*elidable=*/true);
+					add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
+				else if (!is_cxxrtl_sync_port(cell, conn.first)) {
+					// Although at first it looks like outputs of user-defined cells may always be elided, the reality is
+					// more complex. Fully sync outputs produce no defs and so don't participate in elision. Fully comb
+					// outputs are assigned in a different way depending on whether the cell's eval() immediately converged.
+					// Unknown/mixed outputs could be elided, but should be rare in practical designs and don't justify
+					// the infrastructure required to elide outputs of cells with many of them.
+					add_defs(node, conn.second, /*fully_sync=*/false, /*elidable=*/false);
 				}
 			}
 			if (cell->input(conn.first))
@@ -307,11 +397,27 @@ struct FlowGraph {
 
 	Node *add_node(const RTLIL::Cell *cell)
 	{
+		log_assert(cell->known());
+
+		bool has_fully_sync_outputs = false;
+		for (auto conn : cell->connections())
+			if (cell->output(conn.first) && is_cxxrtl_sync_port(cell, conn.first)) {
+				has_fully_sync_outputs = true;
+				break;
+			}
+		if (has_fully_sync_outputs) {
+			Node *node = new Node;
+			node->type = Node::Type::CELL_SYNC;
+			node->cell = cell;
+			nodes.push_back(node);
+			add_cell_sync_defs(node, cell);
+		}
+
 		Node *node = new Node;
-		node->type = Node::Type::CELL;
+		node->type = Node::Type::CELL_EVAL;
 		node->cell = cell;
 		nodes.push_back(node);
-		add_cell_defs_uses(node, cell);
+		add_cell_eval_defs_uses(node, cell);
 		return node;
 	}
 
@@ -319,7 +425,7 @@ struct FlowGraph {
 	void add_case_defs_uses(Node *node, const RTLIL::CaseRule *case_)
 	{
 		for (auto &action : case_->actions) {
-			add_defs(node, action.first, /*elidable=*/false);
+			add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
 			add_uses(node, action.second);
 		}
 		for (auto sub_switch : case_->switches) {
@@ -338,9 +444,9 @@ struct FlowGraph {
 		for (auto sync : process->syncs)
 			for (auto action : sync->actions) {
 				if (sync->type == RTLIL::STp || sync->type == RTLIL::STn || sync->type == RTLIL::STe)
-				  /* sync actions do not introduce feedback */;
+				  add_defs(node, action.first, /*is_sync=*/true,  /*elidable=*/false);
 				else
-					add_defs(node, action.first, /*elidable=*/false);
+					add_defs(node, action.first, /*is_sync=*/false, /*elidable=*/false);
 				add_uses(node, action.second);
 			}
 	}
@@ -356,13 +462,6 @@ struct FlowGraph {
 	}
 };
 
-bool is_cxxrtl_blackbox_cell(const RTLIL::Cell *cell)
-{
-	RTLIL::Module *cell_module = cell->module->design->module(cell->type);
-	log_assert(cell_module != nullptr);
-	return cell_module->get_bool_attribute(ID(cxxrtl.blackbox));
-}
-
 std::vector<std::string> split_by(const std::string &str, const std::string &sep)
 {
 	std::vector<std::string> result;
@@ -414,22 +513,24 @@ struct CxxrtlWorker {
 	bool elide_public = false;
 	bool localize_internal = false;
 	bool localize_public = false;
-	bool run_splitnets = false;
+	bool run_opt_clean_purge = false;
+	bool run_proc_flatten = false;
+	bool max_opt_level = false;
 
 	std::ostringstream f;
 	std::string indent;
 	int temporary = 0;
 
 	dict<const RTLIL::Module*, SigMap> sigmaps;
-	pool<const RTLIL::Wire*> sync_wires;
-	dict<RTLIL::SigBit, RTLIL::SyncType> sync_types;
+	pool<const RTLIL::Wire*> edge_wires;
+	dict<RTLIL::SigBit, RTLIL::SyncType> edge_types;
 	pool<const RTLIL::Memory*> writable_memories;
 	dict<const RTLIL::Cell*, pool<const RTLIL::Cell*>> transparent_for;
-	dict<const RTLIL::Cell*, dict<RTLIL::Wire*, RTLIL::IdString>> cell_wire_defs;
 	dict<const RTLIL::Wire*, FlowGraph::Node> elided_wires;
 	dict<const RTLIL::Module*, std::vector<FlowGraph::Node>> schedule;
 	pool<const RTLIL::Wire*> localized_wires;
 	dict<const RTLIL::Module*, pool<std::string>> blackbox_specializations;
+	dict<const RTLIL::Module*, bool> eval_converges;
 
 	void inc_indent() {
 		indent += "\t";
@@ -669,18 +770,14 @@ struct CxxrtlWorker {
 					case FlowGraph::Node::Type::CONNECT:
 						dump_connect_elided(node.connect);
 						break;
-					case FlowGraph::Node::Type::CELL:
-						if (is_elidable_cell(node.cell->type)) {
-							dump_cell_elided(node.cell);
-						} else {
-							const char *access = is_cxxrtl_blackbox_cell(node.cell) ? "->" : ".";
-							f << mangle(node.cell) << access << mangle_wire_name(cell_wire_defs[node.cell][chunk.wire]) << ".curr";
-						}
+					case FlowGraph::Node::Type::CELL_EVAL:
+						log_assert(is_elidable_cell(node.cell->type));
+						dump_cell_elided(node.cell);
 						break;
 					default:
 						log_assert(false);
 				}
-			} else if (localized_wires[chunk.wire]) {
+			} else if (localized_wires[chunk.wire] || is_input_wire(chunk.wire)) {
 				f << mangle(chunk.wire);
 			} else {
 				f << mangle(chunk.wire) << (is_lhs ? ".next" : ".curr");
@@ -740,8 +837,8 @@ struct CxxrtlWorker {
 				case FlowGraph::Node::Type::CONNECT:
 					collect_connect(node.connect, cells);
 					break;
-				case FlowGraph::Node::Type::CELL:
-					collect_cell(node.cell, cells);
+				case FlowGraph::Node::Type::CELL_EVAL:
+					collect_cell_eval(node.cell, cells);
 					break;
 				default:
 					log_assert(false);
@@ -780,6 +877,19 @@ struct CxxrtlWorker {
 		f << ";\n";
 	}
 
+	void dump_cell_sync(const RTLIL::Cell *cell)
+	{
+		const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
+		f << indent << "// cell " << cell->name.str() << " syncs\n";
+		for (auto conn : cell->connections())
+			if (cell->output(conn.first))
+				if (is_cxxrtl_sync_port(cell, conn.first)) {
+					f << indent;
+					dump_sigspec_lhs(conn.second);
+					f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
+				}
+	}
+
 	void dump_cell_elided(const RTLIL::Cell *cell)
 	{
 		// Unary cells
@@ -833,7 +943,7 @@ struct CxxrtlWorker {
 			elided_wires.count(cell->getPort(ID::Y).as_wire());
 	}
 
-	void collect_cell(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
+	void collect_cell_eval(const RTLIL::Cell *cell, std::vector<RTLIL::IdString> &cells)
 	{
 		if (!is_cell_elided(cell))
 			return;
@@ -844,7 +954,7 @@ struct CxxrtlWorker {
 				collect_sigspec_rhs(port.second, cells);
 	}
 
-	void dump_cell(const RTLIL::Cell *cell)
+	void dump_cell_eval(const RTLIL::Cell *cell)
 	{
 		if (is_cell_elided(cell))
 			return;
@@ -1088,26 +1198,69 @@ struct CxxrtlWorker {
 			log_assert(cell->known());
 			const char *access = is_cxxrtl_blackbox_cell(cell) ? "->" : ".";
 			for (auto conn : cell->connections())
-				if (cell->input(conn.first)) {
+				if (cell->input(conn.first) && !cell->output(conn.first)) {
+					f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << " = ";
+					dump_sigspec_rhs(conn.second);
+					f << ";\n";
+					if (getenv("CXXRTL_VOID_MY_WARRANTY")) {
+						// Until we have proper clock tree detection, this really awful hack that opportunistically
+						// propagates prev_* values for clocks can be used to estimate how much faster a design could
+						// be if only one clock edge was simulated by replacing:
+						//   top.p_clk = value<1>{0u}; top.step();
+						//   top.p_clk = value<1>{1u}; top.step();
+						// with:
+						//   top.prev_p_clk = value<1>{0u}; top.p_clk = value<1>{1u}; top.step();
+						// Don't rely on this; it will be removed without warning.
+						RTLIL::Module *cell_module = cell->module->design->module(cell->type);
+						if (cell_module != nullptr && cell_module->wire(conn.first) && conn.second.is_wire()) {
+							RTLIL::Wire *cell_module_wire = cell_module->wire(conn.first);
+							if (edge_wires[conn.second.as_wire()] && edge_wires[cell_module_wire]) {
+								f << indent << mangle(cell) << access << "prev_" << mangle(cell_module_wire) << " = ";
+								f << "prev_" << mangle(conn.second.as_wire()) << ";\n";
+							}
+						}
+					}
+				} else if (cell->input(conn.first)) {
 					f << indent << mangle(cell) << access << mangle_wire_name(conn.first) << ".next = ";
 					dump_sigspec_rhs(conn.second);
 					f << ";\n";
 				}
-			f << indent << mangle(cell) << access << "eval();\n";
-			for (auto conn : cell->connections()) {
-				if (conn.second.is_wire()) {
-					RTLIL::Wire *wire = conn.second.as_wire();
-					if (elided_wires.count(wire) && cell_wire_defs[cell].count(wire))
-						continue;
+			auto assign_from_outputs = [&](bool cell_converged) {
+				for (auto conn : cell->connections()) {
+					if (cell->output(conn.first)) {
+						if (conn.second.empty())
+							continue; // ignore disconnected ports
+						if (is_cxxrtl_sync_port(cell, conn.first))
+							continue; // fully sync ports are handled in CELL_SYNC nodes
+						f << indent;
+						dump_sigspec_lhs(conn.second);
+						f << " = " << mangle(cell) << access << mangle_wire_name(conn.first);
+						// Similarly to how there is no purpose to buffering cell inputs, there is also no purpose to buffering
+						// combinatorial cell outputs in case the cell converges within one cycle. (To convince yourself that
+						// this optimization is valid, consider that, since the cell converged within one cycle, it would not
+						// have any buffered wires if they were not output ports. Imagine inlining the cell's eval() function,
+						// and consider the fate of the localized wires that used to be output ports.)
+						//
+						// Unlike cell inputs (which are never buffered), it is not possible to know apriori whether the cell
+						// (which may be late bound) will converge immediately. Because of this, the choice between using .curr
+						// (appropriate for buffered outputs) and .next (appropriate for unbuffered outputs) is made at runtime.
+						if (cell_converged && is_cxxrtl_comb_port(cell, conn.first))
+							f << ".next;\n";
+						else
+							f << ".curr;\n";
+					}
 				}
-				if (cell->output(conn.first)) {
-					if (conn.second.empty())
-						continue; // ignore disconnected ports
-					f << indent;
-					dump_sigspec_lhs(conn.second);
-					f << " = " << mangle(cell) << access << mangle_wire_name(conn.first) << ".curr;\n";
-				}
-			}
+			};
+			f << indent << "if (" << mangle(cell) << access << "eval()) {\n";
+			inc_indent();
+				assign_from_outputs(/*cell_converged=*/true);
+			dec_indent();
+			f << indent << "} else {\n";
+			inc_indent();
+				f << indent << "converged = false;\n";
+				assign_from_outputs(/*cell_converged=*/false);
+			dec_indent();
+			f << indent << "}\n";
 		}
 	}
 
@@ -1253,21 +1406,17 @@ struct CxxrtlWorker {
 		}
 	}
 
-	void dump_wire(const RTLIL::Wire *wire, bool is_local)
+	void dump_wire(const RTLIL::Wire *wire, bool is_local_context)
 	{
 		if (elided_wires.count(wire))
 			return;
+		if (localized_wires.count(wire) != is_local_context)
+			return;
 
-		if (is_local) {
-			if (!localized_wires.count(wire))
-				return;
-
+		if (is_local_context) {
 			dump_attrs(wire);
 			f << indent << "value<" << wire->width << "> " << mangle(wire) << ";\n";
 		} else {
-			if (localized_wires.count(wire))
-				return;
-
 			std::string width;
 			if (wire->module->has_attribute(ID(cxxrtl.blackbox)) && wire->has_attribute(ID(cxxrtl.width))) {
 				width = wire->get_string_attribute(ID(cxxrtl.width));
@@ -1276,19 +1425,47 @@ struct CxxrtlWorker {
 			}
 
 			dump_attrs(wire);
-			f << indent << "wire<" << width << "> " << mangle(wire);
+			f << indent << (is_input_wire(wire) ? "value" : "wire") << "<" << width << "> " << mangle(wire);
 			if (wire->has_attribute(ID::init)) {
 				f << " ";
 				dump_const_init(wire->attributes.at(ID::init));
 			}
 			f << ";\n";
-			if (sync_wires[wire]) {
-				for (auto sync_type : sync_types) {
-					if (sync_type.first.wire == wire) {
-						if (sync_type.second != RTLIL::STn)
-							f << indent << "bool posedge_" << mangle(sync_type.first) << " = false;\n";
-						if (sync_type.second != RTLIL::STp)
-							f << indent << "bool negedge_" << mangle(sync_type.first) << " = false;\n";
+			if (edge_wires[wire]) {
+				if (is_input_wire(wire)) {
+					f << indent << "value<" << width << "> prev_" << mangle(wire);
+					if (wire->has_attribute(ID::init)) {
+						f << " ";
+						dump_const_init(wire->attributes.at(ID::init));
+					}
+					f << ";\n";
+				}
+				for (auto edge_type : edge_types) {
+					if (edge_type.first.wire == wire) {
+						std::string prev, next;
+						if (is_input_wire(wire)) {
+							prev = "prev_" + mangle(edge_type.first.wire);
+							next =           mangle(edge_type.first.wire);
+						} else {
+							prev = mangle(edge_type.first.wire) + ".curr";
+							next = mangle(edge_type.first.wire) + ".next";
+						}
+						prev += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
+						next += ".slice<" + std::to_string(edge_type.first.offset) + ">().val()";
+						if (edge_type.second != RTLIL::STn) {
+							f << indent << "bool posedge_" << mangle(edge_type.first) << "() const {\n";
+							inc_indent();
+								f << indent << "return !" << prev << " && " << next << ";\n";
+							dec_indent();
+							f << indent << "}\n";
+						}
+						if (edge_type.second != RTLIL::STp) {
+							f << indent << "bool negedge_" << mangle(edge_type.first) << "() const {\n";
+							inc_indent();
+								f << indent << "return " << prev << " && !" << next << ";\n";
+							dec_indent();
+							f << indent << "}\n";
+						}
 					}
 				}
 			}
@@ -1343,16 +1520,36 @@ struct CxxrtlWorker {
 	void dump_eval_method(RTLIL::Module *module)
 	{
 		inc_indent();
+			f << indent << "bool converged = " << (eval_converges.at(module) ? "true" : "false") << ";\n";
 			if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
+				for (auto wire : module->wires()) {
+					if (edge_wires[wire]) {
+						for (auto edge_type : edge_types) {
+							if (edge_type.first.wire == wire) {
+								if (edge_type.second != RTLIL::STn) {
+									f << indent << "bool posedge_" << mangle(edge_type.first) << " = ";
+									f << "this->posedge_" << mangle(edge_type.first) << "();\n";
+								}
+								if (edge_type.second != RTLIL::STp) {
+									f << indent << "bool negedge_" << mangle(edge_type.first) << " = ";
+									f << "this->negedge_" << mangle(edge_type.first) << "();\n";
+								}
+							}
+						}
+					}
+				}
 				for (auto wire : module->wires())
-					dump_wire(wire, /*is_local=*/true);
+					dump_wire(wire, /*is_local_context=*/true);
 				for (auto node : schedule[module]) {
 					switch (node.type) {
 						case FlowGraph::Node::Type::CONNECT:
 							dump_connect(node.connect);
 							break;
-						case FlowGraph::Node::Type::CELL:
-							dump_cell(node.cell);
+						case FlowGraph::Node::Type::CELL_SYNC:
+							dump_cell_sync(node.cell);
+							break;
+						case FlowGraph::Node::Type::CELL_EVAL:
+							dump_cell_eval(node.cell);
 							break;
 						case FlowGraph::Node::Type::PROCESS:
 							dump_process(node.process);
@@ -1360,14 +1557,7 @@ struct CxxrtlWorker {
 					}
 				}
 			}
-			for (auto sync_type : sync_types) {
-				if (sync_type.first.wire->module == module) {
-					if (sync_type.second != RTLIL::STn)
-						f << indent << "posedge_" << mangle(sync_type.first) << " = false;\n";
-					if (sync_type.second != RTLIL::STp)
-						f << indent << "negedge_" << mangle(sync_type.first) << " = false;\n";
-				}
-			}
+			f << indent << "return converged;\n";
 		dec_indent();
 	}
 
@@ -1378,39 +1568,13 @@ struct CxxrtlWorker {
 			for (auto wire : module->wires()) {
 				if (elided_wires.count(wire) || localized_wires.count(wire))
 					continue;
-				if (sync_wires[wire]) {
-					std::string wire_prev = mangle(wire) + "_prev";
-					std::string wire_curr = mangle(wire) + ".curr";
-					std::string wire_edge = mangle(wire) + "_edge";
-					f << indent << "value<" << wire->width << "> " << wire_prev << " = " << wire_curr << ";\n";
-					f << indent << "if (" << mangle(wire) << ".commit()) {\n";
-					inc_indent();
-						f << indent << "value<" << wire->width << "> " << wire_edge << " = "
-						            << wire_prev << ".bit_xor(" << wire_curr << ");\n";
-						for (auto sync_type : sync_types) {
-							if (sync_type.first.wire != wire)
-								continue;
-							if (sync_type.second != RTLIL::STn) {
-								f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
-								            << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
-								inc_indent();
-									f << indent << "posedge_" << mangle(sync_type.first) << " = true;\n";
-								dec_indent();
-							}
-							if (sync_type.second != RTLIL::STp) {
-								f << indent << "if (" << wire_edge << ".slice<" << sync_type.first.offset << ">().val() && "
-								            << "!" << wire_curr << ".slice<" << sync_type.first.offset << ">().val())\n";
-								inc_indent();
-									f << indent << "negedge_" << mangle(sync_type.first) << " = true;\n";
-								dec_indent();
-							}
-							f << indent << "changed = true;\n";
-						}
-					dec_indent();
-					f << indent << "}\n";
-				} else if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0) {
-					f << indent << "changed |= " << mangle(wire) << ".commit();\n";
+				if (is_input_wire(wire)) {
+					if (edge_wires[wire])
+						f << indent << "prev_" << mangle(wire) << " = " << mangle(wire) << ";\n";
+					continue;
 				}
+				if (!module->get_bool_attribute(ID(cxxrtl.blackbox)) || wire->port_id != 0)
+					f << indent << "changed |= " << mangle(wire) << ".commit();\n";
 			}
 			if (!module->get_bool_attribute(ID(cxxrtl.blackbox))) {
 				for (auto memory : module->memories) {
@@ -1466,10 +1630,10 @@ struct CxxrtlWorker {
 			inc_indent();
 				for (auto wire : module->wires()) {
 					if (wire->port_id != 0)
-						dump_wire(wire, /*is_local=*/false);
+						dump_wire(wire, /*is_local_context=*/false);
 				}
 				f << "\n";
-				f << indent << "void eval() override {\n";
+				f << indent << "bool eval() override {\n";
 				dump_eval_method(module);
 				f << indent << "}\n";
 				f << "\n";
@@ -1506,7 +1670,7 @@ struct CxxrtlWorker {
 			f << indent << "struct " << mangle(module) << " : public module {\n";
 			inc_indent();
 				for (auto wire : module->wires())
-					dump_wire(wire, /*is_local=*/false);
+					dump_wire(wire, /*is_local_context=*/false);
 				f << "\n";
 				bool has_memories = false;
 				for (auto memory : module->memories) {
@@ -1537,7 +1701,7 @@ struct CxxrtlWorker {
 				}
 				if (has_cells)
 					f << "\n";
-				f << indent << "void eval() override;\n";
+				f << indent << "bool eval() override;\n";
 				f << indent << "bool commit() override;\n";
 			dec_indent();
 			f << indent << "}; // struct " << mangle(module) << "\n";
@@ -1549,7 +1713,7 @@ struct CxxrtlWorker {
 	{
 		if (module->get_bool_attribute(ID(cxxrtl.blackbox)))
 			return;
-		f << indent << "void " << mangle(module) << "::eval() {\n";
+		f << indent << "bool " << mangle(module) << "::eval() {\n";
 		dump_eval_method(module);
 		f << indent << "}\n";
 		f << "\n";
@@ -1638,16 +1802,18 @@ struct CxxrtlWorker {
 		log_assert(type == RTLIL::STp || type == RTLIL::STn || type == RTLIL::STe);
 
 		RTLIL::SigBit sigbit = signal[0];
-		if (!sync_types.count(sigbit))
-			sync_types[sigbit] = type;
-		else if (sync_types[sigbit] != type)
-			sync_types[sigbit] = RTLIL::STe;
-		sync_wires.insert(signal.as_wire());
+		if (!edge_types.count(sigbit))
+			edge_types[sigbit] = type;
+		else if (edge_types[sigbit] != type)
+			edge_types[sigbit] = RTLIL::STe;
+		edge_wires.insert(signal.as_wire());
 	}
 
 	void analyze_design(RTLIL::Design *design)
 	{
 		bool has_feedback_arcs = false;
+		bool has_buffered_wires = false;
+
 		for (auto module : design->modules()) {
 			if (!design->selected_module(module))
 				continue;
@@ -1680,6 +1846,10 @@ struct CxxrtlWorker {
 						}
 					}
 				}
+
+				// Black boxes converge by default, since their implementations are quite unlikely to require
+				// internal propagation of comb signals.
+				eval_converges[module] = true;
 				continue;
 			}
 
@@ -1788,23 +1958,15 @@ struct CxxrtlWorker {
 				if (wire->get_bool_attribute(ID::keep)) continue;
 				if (wire->name.begins_with("$") && !elide_internal) continue;
 				if (wire->name.begins_with("\\") && !elide_public) continue;
-				if (sync_wires[wire]) continue;
-				log_assert(flow.wire_defs[wire].size() == 1);
-				elided_wires[wire] = **flow.wire_defs[wire].begin();
+				if (edge_wires[wire]) continue;
+				log_assert(flow.wire_comb_defs[wire].size() == 1);
+				elided_wires[wire] = **flow.wire_comb_defs[wire].begin();
 			}
 
-			// Elided wires that are outputs of internal cells are always connected to a well known port (Y).
-			// For user cells, there could be multiple of them, and we need a way to look up the port name
-			// knowing only the wire.
-			for (auto cell : module->cells())
-				for (auto conn : cell->connections())
-					if (conn.second.is_wire() && elided_wires.count(conn.second.as_wire()))
-						cell_wire_defs[cell][conn.second.as_wire()] = conn.first;
-
 			dict<FlowGraph::Node*, pool<const RTLIL::Wire*>, hash_ptr_ops> node_defs;
-			for (auto wire_def : flow.wire_defs)
-				for (auto node : wire_def.second)
-					node_defs[node].insert(wire_def.first);
+			for (auto wire_comb_def : flow.wire_comb_defs)
+				for (auto node : wire_comb_def.second)
+					node_defs[node].insert(wire_comb_def.first);
 
 			Scheduler<FlowGraph::Node> scheduler;
 			dict<FlowGraph::Node*, Scheduler<FlowGraph::Node>::Vertex*, hash_ptr_ops> node_map;
@@ -1843,10 +2005,9 @@ struct CxxrtlWorker {
 
 			if (!feedback_wires.empty()) {
 				has_feedback_arcs = true;
-				log("Module `%s' contains feedback arcs through wires:\n", module->name.c_str());
-				for (auto wire : feedback_wires) {
-					log("  %s\n", wire->name.c_str());
-				}
+				log("Module `%s' contains feedback arcs through wires:\n", log_id(module));
+				for (auto wire : feedback_wires)
+					log("  %s\n", log_id(wire));
 			}
 
 			for (auto wire : module->wires()) {
@@ -1855,14 +2016,46 @@ struct CxxrtlWorker {
 				if (wire->get_bool_attribute(ID::keep)) continue;
 				if (wire->name.begins_with("$") && !localize_internal) continue;
 				if (wire->name.begins_with("\\") && !localize_public) continue;
-				if (sync_wires[wire]) continue;
-				// Outputs of FF/$memrd cells and LHS of sync actions do not end up in defs.
-				if (flow.wire_defs[wire].size() != 1) continue;
+				if (edge_wires[wire]) continue;
+				if (flow.wire_sync_defs.count(wire) > 0) continue;
 				localized_wires.insert(wire);
 			}
+
+			// For maximum performance, the state of the simulation (which is the same as the set of its double buffered
+			// wires, since using a singly buffered wire for any kind of state introduces a race condition) should contain
+			// no wires attached to combinatorial outputs. Feedback wires, by definition, make that impossible. However,
+			// it is possible that a design with no feedback arcs would end up with doubly buffered wires in such cases
+			// as a wire with multiple drivers where one of them is combinatorial and the other is synchronous. Such designs
+			// also require more than one delta cycle to converge.
+			pool<const RTLIL::Wire*> buffered_wires;
+			for (auto wire : module->wires()) {
+				if (flow.wire_comb_defs[wire].size() > 0 && !elided_wires.count(wire) && !localized_wires[wire]) {
+					if (!feedback_wires[wire])
+						buffered_wires.insert(wire);
+				}
+			}
+			if (!buffered_wires.empty()) {
+				has_buffered_wires = true;
+				log("Module `%s' contains buffered combinatorial wires:\n", log_id(module));
+				for (auto wire : buffered_wires)
+					log("  %s\n", log_id(wire));
+			}
+
+			eval_converges[module] = feedback_wires.empty() && buffered_wires.empty();
 		}
-		if (has_feedback_arcs) {
-			log("Feedback arcs require delta cycles during evaluation.\n");
+		if (has_feedback_arcs || has_buffered_wires) {
+			// Although both non-feedback buffered combinatorial wires and apparent feedback wires may be eliminated
+			// by optimizing the design, if after `opt_clean -purge` there are any feedback wires remaining, it is very
+			// likely that these feedback wires are indicative of a true logic loop, so they get emphasized in the message.
+			const char *why_pessimistic = nullptr;
+			if (has_feedback_arcs)
+				why_pessimistic = "feedback wires";
+			else if (has_buffered_wires)
+				why_pessimistic = "buffered combinatorial wires";
+			log("\n");
+			log_warning("Design contains %s, which require delta cycles during evaluation.\n", why_pessimistic);
+			if (!max_opt_level)
+				log("Increasing the optimization level may eliminate %s from the design.\n", why_pessimistic);
 		}
 	}
 
@@ -1894,8 +2087,12 @@ struct CxxrtlWorker {
 	void prepare_design(RTLIL::Design *design)
 	{
 		bool has_sync_init, has_packed_mem;
+		log_push();
 		check_design(design, has_sync_init, has_packed_mem);
-		if (has_sync_init) {
+		if (run_proc_flatten) {
+			Pass::call(design, "proc");
+			Pass::call(design, "flatten");
+		} else if (has_sync_init) {
 			// We're only interested in proc_init, but it depends on proc_prune and proc_clean, so call those
 			// in case they weren't already. (This allows `yosys foo.v -o foo.cc` to work.)
 			Pass::call(design, "proc_prune");
@@ -1908,18 +2105,15 @@ struct CxxrtlWorker {
 		if (has_sync_init || has_packed_mem)
 			check_design(design, has_sync_init, has_packed_mem);
 		log_assert(!(has_sync_init || has_packed_mem));
-
-		if (run_splitnets) {
-			Pass::call(design, "splitnets -driver");
+		if (run_opt_clean_purge)
 			Pass::call(design, "opt_clean -purge");
-		}
-		log("\n");
+		log_pop();
 		analyze_design(design);
 	}
 };
 
 struct CxxrtlBackend : public Backend {
-	static const int DEFAULT_OPT_LEVEL = 5;
+	static const int DEFAULT_OPT_LEVEL = 6;
 
 	CxxrtlBackend() : Backend("cxxrtl", "convert design to C++ RTL simulation") { }
 	void help() YS_OVERRIDE
@@ -1941,9 +2135,9 @@ struct CxxrtlBackend : public Backend {
 		log("      top.step();\n");
 		log("      while (1) {\n");
 		log("        /* user logic */\n");
-		log("        top.p_clk.next = value<1> {0u};\n");
+		log("        top.p_clk = value<1> {0u};\n");
 		log("        top.step();\n");
-		log("        top.p_clk.next = value<1> {1u};\n");
+		log("        top.p_clk = value<1> {1u};\n");
 		log("        top.step();\n");
 		log("      }\n");
 		log("    }\n");
@@ -1965,18 +2159,20 @@ struct CxxrtlBackend : public Backend {
 		log("    module debug(...);\n");
 		log("      (* cxxrtl.edge = \"p\" *) input clk;\n");
 		log("      input en;\n");
-		log("      input [7:0] data;\n");
+		log("      input [7:0] i_data;\n");
+		log("      (* cxxrtl.sync *) output [7:0] o_data;\n");
 		log("    endmodule\n");
 		log("\n");
 		log("For this HDL interface, this backend will generate the following C++ interface:\n");
 		log("\n");
 		log("    struct bb_p_debug : public module {\n");
-		log("      wire<1> p_clk;\n");
-		log("      bool posedge_p_clk = false;\n");
-		log("      wire<1> p_en;\n");
-		log("      wire<8> p_data;\n");
+		log("      value<1> p_clk;\n");
+		log("      bool posedge_p_clk() const { /* ... */ }\n");
+		log("      value<1> p_en;\n");
+		log("      value<8> p_i_data;\n");
+		log("      wire<8> p_o_data;\n");
 		log("\n");
-		log("      void eval() override;\n");
+		log("      bool eval() override;\n");
 		log("      bool commit() override;\n");
 		log("\n");
 		log("      static std::unique_ptr<bb_p_debug>\n");
@@ -1989,10 +2185,11 @@ struct CxxrtlBackend : public Backend {
 		log("    namespace cxxrtl_design {\n");
 		log("\n");
 		log("    struct stderr_debug : public bb_p_debug {\n");
-		log("      void eval() override {\n");
-		log("        if (posedge_p_clk && p_en.curr)\n");
-		log("          fprintf(stderr, \"debug: %%02x\\n\", p_data.curr.data[0]);\n");
-		log("        bb_p_debug::eval();\n");
+		log("      bool eval() override {\n");
+		log("        if (posedge_p_clk() && p_en)\n");
+		log("          fprintf(stderr, \"debug: %%02x\\n\", p_i_data.data[0]);\n");
+		log("        p_o_data.next = p_i_data;\n");
+		log("        return bb_p_debug::eval();\n");
 		log("      }\n");
 		log("    };\n");
 		log("\n");
@@ -2013,7 +2210,8 @@ struct CxxrtlBackend : public Backend {
 		log("      parameter WIDTH = 8;\n");
 		log("      (* cxxrtl.edge = \"p\" *) input clk;\n");
 		log("      input en;\n");
-		log("      (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] data;\n");
+		log("      (* cxxrtl.width = \"WIDTH\" *) input [WIDTH - 1:0] i_data;\n");
+		log("      (* cxxrtl.width = \"WIDTH\" *) output [WIDTH - 1:0] o_data;\n");
 		log("    endmodule\n");
 		log("\n");
 		log("For this parametric HDL interface, this backend will generate the following C++\n");
@@ -2022,7 +2220,8 @@ struct CxxrtlBackend : public Backend {
 		log("    template<size_t WIDTH>\n");
 		log("    struct bb_p_debug : public module {\n");
 		log("      // ...\n");
-		log("      wire<WIDTH> p_data;\n");
+		log("      value<WIDTH> p_i_data;\n");
+		log("      wire<WIDTH> p_o_data;\n");
 		log("      // ...\n");
 		log("      static std::unique_ptr<bb_p_debug<WIDTH>>\n");
 		log("      create(std::string name, metadata_map parameters, metadata_map attributes);\n");
@@ -2053,10 +2252,9 @@ struct CxxrtlBackend : public Backend {
 		log("\n");
 		log("    cxxrtl.edge\n");
 		log("        only valid on inputs of black boxes. must be one of \"p\", \"n\", \"a\".\n");
-		log("        if specified on signal `clk`, the generated code includes boolean fields\n");
-		log("        `posedge_p_clk` (if \"p\"), `negedge_p_clk` (if \"n\"), or both (if \"a\"),\n");
-		log("        as well as edge detection logic, simplifying implementation of clocked\n");
-		log("        black boxes.\n");
+		log("        if specified on signal `clk`, the generated code includes edge detectors\n");
+		log("        `posedge_p_clk()` (if \"p\"), `negedge_p_clk()` (if \"n\"), or both (if\n");
+		log("        \"a\"), simplifying implementation of clocked black boxes.\n");
 		log("\n");
 		log("    cxxrtl.template\n");
 		log("        only valid on black boxes. must contain a space separated sequence of\n");
@@ -2067,6 +2265,13 @@ struct CxxrtlBackend : public Backend {
 		log("        only valid on ports of black boxes. must be a constant expression, which\n");
 		log("        is directly inserted into generated code.\n");
 		log("\n");
+		log("    cxxrtl.comb, cxxrtl.sync\n");
+		log("        only valid on outputs of black boxes. if specified, indicates that every\n");
+		log("        bit of the output port is driven, correspondingly, by combinatorial or\n");
+		log("        synchronous logic. this knowledge is used for scheduling optimizations.\n");
+		log("        if neither is specified, the output will be pessimistically treated as\n");
+		log("        driven by both combinatorial and synchronous logic.\n");
+		log("\n");
 		log("The following options are supported by this backend:\n");
 		log("\n");
 		log("    -header\n");
@@ -2100,7 +2305,10 @@ struct CxxrtlBackend : public Backend {
 		log("        like -O3, and localize public wires not marked (*keep*) if possible.\n");
 		log("\n");
 		log("    -O5\n");
-		log("        like -O4, and run `splitnets -driver; opt_clean -purge` first.\n");
+		log("        like -O4, and run `opt_clean -purge` first.\n");
+		log("\n");
+		log("    -O6\n");
+		log("        like -O5, and run `proc; flatten` first.\n");
 		log("\n");
 	}
 	void execute(std::ostream *&f, std::string filename, std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -2134,8 +2342,11 @@ struct CxxrtlBackend : public Backend {
 		extra_args(f, filename, args, argidx);
 
 		switch (opt_level) {
+			case 6:
+				worker.max_opt_level = true;
+				worker.run_proc_flatten = true;
 			case 5:
-				worker.run_splitnets = true;
+				worker.run_opt_clean_purge = true;
 			case 4:
 				worker.localize_public = true;
 			case 3:
diff --git a/backends/cxxrtl/cxxrtl.h b/backends/cxxrtl/cxxrtl.h
index 41e6290d1..b79bbbc72 100644
--- a/backends/cxxrtl/cxxrtl.h
+++ b/backends/cxxrtl/cxxrtl.h
@@ -717,15 +717,16 @@ struct module {
 	module(const module &) = delete;
 	module &operator=(const module &) = delete;
 
-	virtual void eval() = 0;
+	virtual bool eval() = 0;
 	virtual bool commit() = 0;
 
 	size_t step() {
 		size_t deltas = 0;
+		bool converged = false;
 		do {
-			eval();
+			converged = eval();
 			deltas++;
-		} while (commit());
+		} while (commit() && !converged);
 		return deltas;
 	}
 };