From 06f8f2654abdef8684bfe4f373ac42cb8c62ee2a Mon Sep 17 00:00:00 2001
From: Eddie Hung <eddieh@ece.ubc.ca>
Date: Fri, 15 Mar 2019 19:13:40 -0700
Subject: [PATCH] Working

---
 passes/techmap/shregmap.cc      | 655 +++++++++++++++++++-------------
 techlibs/xilinx/cells_map.v     | 109 ++++--
 techlibs/xilinx/synth_xilinx.cc |  16 +-
 3 files changed, 470 insertions(+), 310 deletions(-)

diff --git a/passes/techmap/shregmap.cc b/passes/techmap/shregmap.cc
index f20863ba0..4b8f8a828 100644
--- a/passes/techmap/shregmap.cc
+++ b/passes/techmap/shregmap.cc
@@ -26,7 +26,9 @@ PRIVATE_NAMESPACE_BEGIN
 struct ShregmapTech
 {
 	virtual ~ShregmapTech() { }
-	virtual bool analyze(vector<int> &taps) = 0;
+	virtual void init(const Module * /*module*/, const SigMap &/*sigmap*/) {}
+	virtual void non_chain_user(const SigBit &/*bit*/, const Cell* /*cell*/, IdString /*port*/) {}
+	virtual bool analyze(vector<int> &taps, const vector<SigBit> &qbits) = 0;
 	virtual bool fixup(Cell *cell, dict<int, SigBit> &taps) = 0;
 };
 
@@ -54,7 +56,7 @@ struct ShregmapOptions
 
 struct ShregmapTechGreenpak4 : ShregmapTech
 {
-	bool analyze(vector<int> &taps)
+	bool analyze(vector<int> &taps, const vector<SigBit> &/*qbits*/)
 	{
 		if (GetSize(taps) > 2 && taps[0] == 0 && taps[2] < 17) {
 			taps.clear();
@@ -91,302 +93,423 @@ struct ShregmapTechGreenpak4 : ShregmapTech
 	}
 };
 
+struct ShregmapTechXilinx7 : ShregmapTech
+{
+	dict<SigBit, Cell*> sigbit_to_shiftx;
+	const ShregmapOptions &opts;
+
+	ShregmapTechXilinx7(const ShregmapOptions &opts) : opts(opts) {}
+
+	virtual void init(const Module* module, const SigMap &sigmap) override
+	{
+		for (auto i : module->cells_) {
+			auto cell = i.second;
+			if (cell->type != "$shiftx") continue;
+			if (cell->getParam("\\Y_WIDTH") != 1) continue;
+			for (auto bit : sigmap(cell->getPort("\\A")))
+				sigbit_to_shiftx[bit] = cell;
+		}
+	}
+
+	virtual void non_chain_user(const SigBit &bit, const Cell *cell, IdString port) override
+	{
+		auto it = sigbit_to_shiftx.find(bit);
+		if (it == sigbit_to_shiftx.end())
+			return;
+		if (cell->type == "$shiftx" && port == "\\A")
+			return;
+		it->second = nullptr;
+	}
+
+	virtual bool analyze(vector<int> &taps, const vector<SigBit> &qbits) override
+	{
+		if (GetSize(taps) == 1)
+			return taps[0] >= opts.minlen-1;
+
+		if (taps.back() < opts.minlen-1)
+			return false;
+
+		Cell *shiftx = nullptr;
+		int offset = 0;
+		for (int i = 0; i < GetSize(taps); ++i) {
+			// Check taps are sequential
+			if (i != taps[i])
+				return false;
+			// Check taps are not connected to a shift register,
+			// or sequential to the same shift register
+			auto it = sigbit_to_shiftx.find(qbits[i]);
+			if (i == 0) {
+				if (it != sigbit_to_shiftx.end()) {
+					shiftx = it->second;
+					// NULL indicates there are non-shiftx users
+					if (shiftx == nullptr)
+						return false;
+					offset = qbits[i].offset;
+				}
+			}
+			else {
+				if (it == sigbit_to_shiftx.end()) {
+					if (shiftx != nullptr)
+						return false;
+				}
+				else {
+					if (shiftx != it->second)
+						return false;
+					if (qbits[i].offset != offset + i)
+						return false;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	virtual bool fixup(Cell *cell, dict<int, SigBit> &taps) override
+	{
+		const auto &tap = *taps.begin();
+		auto bit = tap.second;
+		auto it = sigbit_to_shiftx.find(bit);
+		if (it == sigbit_to_shiftx.end())
+			return true;
+
+		Cell* shiftx = it->second;
+
+		auto module = cell->module;
+
+		auto cell_q = cell->getPort("\\Q").as_bit();
+
+		auto shiftx_a = shiftx->getPort("\\A").bits();
+		int offset = 0;
+		for (auto bit : shiftx_a) {
+			if (bit == cell_q)
+				break;
+			++offset;
+		}
+		offset -= taps.size() - 1;
+		log_assert(offset >= 0);
+		for (size_t i = offset; i < offset + taps.size(); ++i)
+			shiftx_a[i] = cell_q;
+		// FIXME: Hack to ensure that $shiftx gets optimised away
+		//   Without this, Yosys will refuse to optimise away a $shiftx
+		//   where \\A 's width is not perfectly \\B_WIDTH ** 2
+		auto shiftx_bwidth = shiftx->getParam("\\B_WIDTH").as_int();
+		shiftx_a.resize(1 << shiftx_bwidth, shiftx_a.back());
+		shiftx->setPort("\\A", shiftx_a);
+		shiftx->setParam("\\A_WIDTH", shiftx_a.size());
+
+		auto length = module->addWire(NEW_ID, ceil(log2(taps.size())));
+		module->addSub(NEW_ID, shiftx->getPort("\\B"), RTLIL::Const(offset, ceil(log2(offset))), length);
+		cell->setPort("\\L", length);
+
+
+		return true;
+	}
+};
+
+
 struct ShregmapWorker
 {
-	Module *module;
-	SigMap sigmap;
+    Module *module;
+    SigMap sigmap;
 
-	const ShregmapOptions &opts;
-	int dff_count, shreg_count;
+    const ShregmapOptions &opts;
+    int dff_count, shreg_count;
 
-	pool<Cell*> remove_cells;
-	pool<SigBit> remove_init;
+    pool<Cell*> remove_cells;
+    pool<SigBit> remove_init;
 
-	dict<SigBit, bool> sigbit_init;
-	dict<SigBit, Cell*> sigbit_chain_next;
-	dict<SigBit, Cell*> sigbit_chain_prev;
-	pool<SigBit> sigbit_with_non_chain_users;
-	pool<Cell*> chain_start_cells;
+    dict<SigBit, bool> sigbit_init;
+    dict<SigBit, Cell*> sigbit_chain_next;
+    dict<SigBit, Cell*> sigbit_chain_prev;
+    pool<SigBit> sigbit_with_non_chain_users;
+    pool<Cell*> chain_start_cells;
 
-	void make_sigbit_chain_next_prev()
+    void make_sigbit_chain_next_prev()
+    {
+	for (auto wire : module->wires())
 	{
-		for (auto wire : module->wires())
-		{
-			if (wire->port_output || wire->get_bool_attribute("\\keep")) {
-				for (auto bit : sigmap(wire))
-					sigbit_with_non_chain_users.insert(bit);
-			}
+	    if (wire->port_output || wire->get_bool_attribute("\\keep")) {
+		for (auto bit : sigmap(wire))
+		    sigbit_with_non_chain_users.insert(bit);
+	    }
 
-			if (wire->attributes.count("\\init")) {
-				SigSpec initsig = sigmap(wire);
-				Const initval = wire->attributes.at("\\init");
-				for (int i = 0; i < GetSize(initsig) && i < GetSize(initval); i++)
-					if (initval[i] == State::S0 && !opts.zinit)
-						sigbit_init[initsig[i]] = false;
-					else if (initval[i] == State::S1)
-						sigbit_init[initsig[i]] = true;
-			}
-		}
-
-		for (auto cell : module->cells())
-		{
-			if (opts.ffcells.count(cell->type) && !cell->get_bool_attribute("\\keep"))
-			{
-				IdString d_port = opts.ffcells.at(cell->type).first;
-				IdString q_port = opts.ffcells.at(cell->type).second;
-
-				SigBit d_bit = sigmap(cell->getPort(d_port).as_bit());
-				SigBit q_bit = sigmap(cell->getPort(q_port).as_bit());
-
-				if (opts.init || sigbit_init.count(q_bit) == 0)
-				{
-					if (sigbit_chain_next.count(d_bit)) {
-						sigbit_with_non_chain_users.insert(d_bit);
-					} else
-						sigbit_chain_next[d_bit] = cell;
-
-					sigbit_chain_prev[q_bit] = cell;
-					continue;
-				}
-			}
-
-			for (auto conn : cell->connections())
-				if (cell->input(conn.first))
-					for (auto bit : sigmap(conn.second))
-						sigbit_with_non_chain_users.insert(bit);
-		}
+	    if (wire->attributes.count("\\init")) {
+		SigSpec initsig = sigmap(wire);
+		Const initval = wire->attributes.at("\\init");
+		for (int i = 0; i < GetSize(initsig) && i < GetSize(initval); i++)
+		    if (initval[i] == State::S0 && !opts.zinit)
+			sigbit_init[initsig[i]] = false;
+		    else if (initval[i] == State::S1)
+			sigbit_init[initsig[i]] = true;
+	    }
 	}
 
-	void find_chain_start_cells()
+	for (auto cell : module->cells())
 	{
-		for (auto it : sigbit_chain_next)
+	    if (opts.ffcells.count(cell->type) && !cell->get_bool_attribute("\\keep"))
+	    {
+		IdString d_port = opts.ffcells.at(cell->type).first;
+		IdString q_port = opts.ffcells.at(cell->type).second;
+
+		SigBit d_bit = sigmap(cell->getPort(d_port).as_bit());
+		SigBit q_bit = sigmap(cell->getPort(q_port).as_bit());
+
+		if (opts.init || sigbit_init.count(q_bit) == 0)
 		{
-			if (opts.tech == nullptr && sigbit_with_non_chain_users.count(it.first))
-				goto start_cell;
+		    if (sigbit_chain_next.count(d_bit)) {
+			sigbit_with_non_chain_users.insert(d_bit);
+		    } else
+			sigbit_chain_next[d_bit] = cell;
 
-			if (sigbit_chain_prev.count(it.first) != 0)
-			{
-				Cell *c1 = sigbit_chain_prev.at(it.first);
-				Cell *c2 = it.second;
-
-				if (c1->type != c2->type)
-					goto start_cell;
-
-				if (c1->parameters != c2->parameters)
-					goto start_cell;
-
-				IdString d_port = opts.ffcells.at(c1->type).first;
-				IdString q_port = opts.ffcells.at(c1->type).second;
-
-				auto c1_conn = c1->connections();
-				auto c2_conn = c1->connections();
-
-				c1_conn.erase(d_port);
-				c1_conn.erase(q_port);
-
-				c2_conn.erase(d_port);
-				c2_conn.erase(q_port);
-
-				if (c1_conn != c2_conn)
-					goto start_cell;
-
-				continue;
-			}
-
-		start_cell:
-			chain_start_cells.insert(it.second);
+		    sigbit_chain_prev[q_bit] = cell;
+		    continue;
 		}
+	    }
+
+	    for (auto conn : cell->connections())
+		if (cell->input(conn.first))
+		    for (auto bit : sigmap(conn.second)) {
+			sigbit_with_non_chain_users.insert(bit);
+			if (opts.tech) opts.tech->non_chain_user(bit, cell, conn.first);
+		    }
+	}
+    }
+
+    void find_chain_start_cells()
+    {
+	for (auto it : sigbit_chain_next)
+	{
+	    if (opts.tech == nullptr && sigbit_with_non_chain_users.count(it.first))
+		goto start_cell;
+
+	    if (sigbit_chain_prev.count(it.first) != 0)
+	    {
+		Cell *c1 = sigbit_chain_prev.at(it.first);
+		Cell *c2 = it.second;
+
+		if (c1->type != c2->type)
+		    goto start_cell;
+
+		if (c1->parameters != c2->parameters)
+		    goto start_cell;
+
+		IdString d_port = opts.ffcells.at(c1->type).first;
+		IdString q_port = opts.ffcells.at(c1->type).second;
+
+		auto c1_conn = c1->connections();
+		auto c2_conn = c1->connections();
+
+		c1_conn.erase(d_port);
+		c1_conn.erase(q_port);
+
+		c2_conn.erase(d_port);
+		c2_conn.erase(q_port);
+
+		if (c1_conn != c2_conn)
+		    goto start_cell;
+
+		continue;
+	    }
+
+start_cell:
+	    chain_start_cells.insert(it.second);
+	}
+    }
+
+    vector<Cell*> create_chain(Cell *start_cell)
+    {
+	vector<Cell*> chain;
+
+	Cell *c = start_cell;
+	while (c != nullptr)
+	{
+	    chain.push_back(c);
+
+	    IdString q_port = opts.ffcells.at(c->type).second;
+	    SigBit q_bit = sigmap(c->getPort(q_port).as_bit());
+
+	    if (sigbit_chain_next.count(q_bit) == 0)
+		break;
+
+	    c = sigbit_chain_next.at(q_bit);
+	    if (chain_start_cells.count(c) != 0)
+		break;
 	}
 
-	vector<Cell*> create_chain(Cell *start_cell)
-	{
-		vector<Cell*> chain;
+	return chain;
+    }
 
-		Cell *c = start_cell;
-		while (c != nullptr)
+    void process_chain(vector<Cell*> &chain)
+    {
+	if (GetSize(chain) < opts.keep_before + opts.minlen + opts.keep_after)
+	    return;
+
+	int cursor = opts.keep_before;
+	while (cursor < GetSize(chain) - opts.keep_after)
+	{
+	    int depth = GetSize(chain) - opts.keep_after - cursor;
+
+	    if (opts.maxlen > 0)
+		depth = std::min(opts.maxlen, depth);
+
+	    Cell *first_cell = chain[cursor];
+	    IdString q_port = opts.ffcells.at(first_cell->type).second;
+	    dict<int, SigBit> taps_dict;
+
+	    if (opts.tech)
+	    {
+		vector<SigBit> qbits;
+		vector<int> taps;
+
+		for (int i = 0; i < depth; i++)
 		{
-			chain.push_back(c);
+		    Cell *cell = chain[cursor+i];
+		    auto qbit = sigmap(cell->getPort(q_port));
+		    qbits.push_back(qbit);
 
-			IdString q_port = opts.ffcells.at(c->type).second;
-			SigBit q_bit = sigmap(c->getPort(q_port).as_bit());
-
-			if (sigbit_chain_next.count(q_bit) == 0)
-				break;
-
-			c = sigbit_chain_next.at(q_bit);
-			if (chain_start_cells.count(c) != 0)
-				break;
+		    if (sigbit_with_non_chain_users.count(qbit))
+			taps.push_back(i);
 		}
 
-		return chain;
-	}
-
-	void process_chain(vector<Cell*> &chain)
-	{
-		if (GetSize(chain) < opts.keep_before + opts.minlen + opts.keep_after)
-			return;
-
-		int cursor = opts.keep_before;
-		while (cursor < GetSize(chain) - opts.keep_after)
+		while (depth > 0)
 		{
-			int depth = GetSize(chain) - opts.keep_after - cursor;
+		    if (taps.empty() || taps.back() < depth-1)
+			taps.push_back(depth-1);
 
-			if (opts.maxlen > 0)
-				depth = std::min(opts.maxlen, depth);
+		    if (opts.tech->analyze(taps, qbits))
+			break;
 
-			Cell *first_cell = chain[cursor];
-			IdString q_port = opts.ffcells.at(first_cell->type).second;
-			dict<int, SigBit> taps_dict;
-
-			if (opts.tech)
-			{
-				vector<SigBit> qbits;
-				vector<int> taps;
-
-				for (int i = 0; i < depth; i++)
-				{
-					Cell *cell = chain[cursor+i];
-					auto qbit = sigmap(cell->getPort(q_port));
-					qbits.push_back(qbit);
-
-					if (sigbit_with_non_chain_users.count(qbit))
-						taps.push_back(i);
-				}
-
-				while (depth > 0)
-				{
-					if (taps.empty() || taps.back() < depth-1)
-						taps.push_back(depth-1);
-
-					if (opts.tech->analyze(taps))
-						break;
-
-					taps.pop_back();
-					depth--;
-				}
-
-				depth = 0;
-				for (auto tap : taps) {
-					taps_dict[tap] = qbits.at(tap);
-					log_assert(depth < tap+1);
-					depth = tap+1;
-				}
-			}
-
-			if (depth < 2) {
-				cursor++;
-				continue;
-			}
-
-			Cell *last_cell = chain[cursor+depth-1];
-
-			log("Converting %s.%s ... %s.%s to a shift register with depth %d.\n",
-				log_id(module), log_id(first_cell), log_id(module), log_id(last_cell), depth);
-
-			dff_count += depth;
-			shreg_count += 1;
-
-			string shreg_cell_type_str = "$__SHREG";
-			if (opts.params) {
-				shreg_cell_type_str += "_";
-			} else {
-				if (first_cell->type[1] != '_')
-					shreg_cell_type_str += "_";
-				shreg_cell_type_str += first_cell->type.substr(1);
-			}
-
-			if (opts.init) {
-				vector<State> initval;
-				for (int i = depth-1; i >= 0; i--) {
-					SigBit bit = sigmap(chain[cursor+i]->getPort(q_port).as_bit());
-					if (sigbit_init.count(bit) == 0)
-						initval.push_back(State::Sx);
-					else if (sigbit_init.at(bit))
-						initval.push_back(State::S1);
-					else
-						initval.push_back(State::S0);
-					remove_init.insert(bit);
-				}
-				first_cell->setParam("\\INIT", initval);
-			}
-
-			if (opts.zinit)
-				for (int i = depth-1; i >= 0; i--) {
-					SigBit bit = sigmap(chain[cursor+i]->getPort(q_port).as_bit());
-					remove_init.insert(bit);
-				}
-
-			if (opts.params)
-			{
-				int param_clkpol = -1;
-				int param_enpol = 2;
-
-				if (first_cell->type == "$_DFF_N_") param_clkpol = 0;
-				if (first_cell->type == "$_DFF_P_") param_clkpol = 1;
-
-				if (first_cell->type == "$_DFFE_NN_") param_clkpol = 0, param_enpol = 0;
-				if (first_cell->type == "$_DFFE_NP_") param_clkpol = 0, param_enpol = 1;
-				if (first_cell->type == "$_DFFE_PN_") param_clkpol = 1, param_enpol = 0;
-				if (first_cell->type == "$_DFFE_PP_") param_clkpol = 1, param_enpol = 1;
-
-				log_assert(param_clkpol >= 0);
-				first_cell->setParam("\\CLKPOL", param_clkpol);
-				if (opts.ffe) first_cell->setParam("\\ENPOL", param_enpol);
-			}
-
-			first_cell->type = shreg_cell_type_str;
-			first_cell->setPort(q_port, last_cell->getPort(q_port));
-			first_cell->setParam("\\DEPTH", depth);
-
-			if (opts.tech != nullptr && !opts.tech->fixup(first_cell, taps_dict))
-				remove_cells.insert(first_cell);
-
-			for (int i = 1; i < depth; i++)
-				remove_cells.insert(chain[cursor+i]);
-			cursor += depth;
+		    taps.pop_back();
+		    depth--;
 		}
-	}
 
-	void cleanup()
+		depth = 0;
+		for (auto tap : taps) {
+		    taps_dict[tap] = qbits.at(tap);
+		    log_assert(depth < tap+1);
+		    depth = tap+1;
+		}
+	    }
+
+	    if (depth < 2) {
+		cursor++;
+		continue;
+	    }
+
+	    Cell *last_cell = chain[cursor+depth-1];
+
+	    log("Converting %s.%s ... %s.%s to a shift register with depth %d.\n",
+		    log_id(module), log_id(first_cell), log_id(module), log_id(last_cell), depth);
+
+	    dff_count += depth;
+	    shreg_count += 1;
+
+	    string shreg_cell_type_str = "$__SHREG";
+	    if (opts.params) {
+		shreg_cell_type_str += "_";
+	    } else {
+		if (first_cell->type[1] != '_')
+		    shreg_cell_type_str += "_";
+		shreg_cell_type_str += first_cell->type.substr(1);
+	    }
+
+	    if (opts.init) {
+		vector<State> initval;
+		for (int i = depth-1; i >= 0; i--) {
+		    SigBit bit = sigmap(chain[cursor+i]->getPort(q_port).as_bit());
+		    if (sigbit_init.count(bit) == 0)
+			initval.push_back(State::Sx);
+		    else if (sigbit_init.at(bit))
+			initval.push_back(State::S1);
+		    else
+			initval.push_back(State::S0);
+		    remove_init.insert(bit);
+		}
+		first_cell->setParam("\\INIT", initval);
+	    }
+
+	    if (opts.zinit)
+		for (int i = depth-1; i >= 0; i--) {
+		    SigBit bit = sigmap(chain[cursor+i]->getPort(q_port).as_bit());
+		    remove_init.insert(bit);
+		}
+
+	    if (opts.params)
+	    {
+		int param_clkpol = -1;
+		int param_enpol = 2;
+
+		if (first_cell->type == "$_DFF_N_") param_clkpol = 0;
+		if (first_cell->type == "$_DFF_P_") param_clkpol = 1;
+
+		if (first_cell->type == "$_DFFE_NN_") param_clkpol = 0, param_enpol = 0;
+		if (first_cell->type == "$_DFFE_NP_") param_clkpol = 0, param_enpol = 1;
+		if (first_cell->type == "$_DFFE_PN_") param_clkpol = 1, param_enpol = 0;
+		if (first_cell->type == "$_DFFE_PP_") param_clkpol = 1, param_enpol = 1;
+
+		log_assert(param_clkpol >= 0);
+		first_cell->setParam("\\CLKPOL", param_clkpol);
+		if (opts.ffe) first_cell->setParam("\\ENPOL", param_enpol);
+	    }
+
+	    first_cell->type = shreg_cell_type_str;
+	    first_cell->setPort(q_port, last_cell->getPort(q_port));
+	    if (!first_cell->hasPort("\\L"))
+		    first_cell->setPort("\\L", depth-1);
+	    first_cell->setParam("\\DEPTH", depth);
+
+	    if (opts.tech != nullptr && !opts.tech->fixup(first_cell, taps_dict))
+		remove_cells.insert(first_cell);
+
+	    for (int i = 1; i < depth; i++)
+		remove_cells.insert(chain[cursor+i]);
+	    cursor += depth;
+	}
+    }
+
+    void cleanup()
+    {
+	for (auto cell : remove_cells)
+	    module->remove(cell);
+
+	for (auto wire : module->wires())
 	{
-		for (auto cell : remove_cells)
-			module->remove(cell);
+	    if (wire->attributes.count("\\init") == 0)
+		continue;
 
-		for (auto wire : module->wires())
-		{
-			if (wire->attributes.count("\\init") == 0)
-				continue;
+	    SigSpec initsig = sigmap(wire);
+	    Const &initval = wire->attributes.at("\\init");
 
-			SigSpec initsig = sigmap(wire);
-			Const &initval = wire->attributes.at("\\init");
+	    for (int i = 0; i < GetSize(initsig) && i < GetSize(initval); i++)
+		if (remove_init.count(initsig[i]))
+		    initval[i] = State::Sx;
 
-			for (int i = 0; i < GetSize(initsig) && i < GetSize(initval); i++)
-				if (remove_init.count(initsig[i]))
-					initval[i] = State::Sx;
-
-			if (SigSpec(initval).is_fully_undef())
-				wire->attributes.erase("\\init");
-		}
-
-		remove_cells.clear();
-		sigbit_chain_next.clear();
-		sigbit_chain_prev.clear();
-		chain_start_cells.clear();
+	    if (SigSpec(initval).is_fully_undef())
+		wire->attributes.erase("\\init");
 	}
 
-	ShregmapWorker(Module *module, const ShregmapOptions &opts) :
-			module(module), sigmap(module), opts(opts), dff_count(0), shreg_count(0)
-	{
-		make_sigbit_chain_next_prev();
-		find_chain_start_cells();
+	remove_cells.clear();
+	sigbit_chain_next.clear();
+	sigbit_chain_prev.clear();
+	chain_start_cells.clear();
+    }
 
-		for (auto c : chain_start_cells) {
-			vector<Cell*> chain = create_chain(c);
-			process_chain(chain);
-		}
+    ShregmapWorker(Module *module, const ShregmapOptions &opts) :
+	module(module), sigmap(module), opts(opts), dff_count(0), shreg_count(0)
+    {
+	if (opts.tech)
+	    opts.tech->init(module, sigmap);
 
-		cleanup();
+	make_sigbit_chain_next_prev();
+	find_chain_start_cells();
+
+	for (auto c : chain_start_cells) {
+	    vector<Cell*> chain = create_chain(c);
+	    process_chain(chain);
 	}
+
+	cleanup();
+    }
 };
 
 struct ShregmapPass : public Pass {
@@ -501,6 +624,12 @@ struct ShregmapPass : public Pass {
 					clkpol = "pos";
 					opts.zinit = true;
 					opts.tech = new ShregmapTechGreenpak4;
+				}
+				else if (tech == "xilinx") {
+					opts.init = true;
+					opts.params = true;
+					enpol = "any_or_none";
+					opts.tech = new ShregmapTechXilinx7(opts);
 				} else {
 					argidx--;
 					break;
diff --git a/techlibs/xilinx/cells_map.v b/techlibs/xilinx/cells_map.v
index 69f8b85f4..e7fb269e9 100644
--- a/techlibs/xilinx/cells_map.v
+++ b/techlibs/xilinx/cells_map.v
@@ -17,7 +17,7 @@
  *
  */
 
-module \$__SHREG_ (input C, input D, input E, output Q);
+module \$__SHREG_ (input C, input D, input [31:0] L, input E, output Q);
   parameter DEPTH = 0;
   parameter [DEPTH-1:0] INIT = 0;
   parameter CLKPOL = 1;
@@ -36,6 +36,9 @@ module \$__SHREG_ (input C, input D, input E, output Q);
   endfunction
   localparam [DEPTH-1:0] INIT_R = brev(INIT);
 
+  parameter _TECHMAP_CONSTMSK_L_ = 0;
+  parameter _TECHMAP_CONSTVAL_L_ = 0;
+
   generate
     if (ENPOL == 0)
       assign CE = ~E;
@@ -44,60 +47,86 @@ module \$__SHREG_ (input C, input D, input E, output Q);
     else
       assign CE = 1'b1;
     if (DEPTH == 1) begin
-        if (CLKPOL)
-            FDRE #(.INIT(INIT_R)) _TECHMAP_REPLACE_ (.D(D), .Q(Q), .C(C), .CE(CE), .R(1'b0));
-        else
-            FDRE_1 #(.INIT(INIT_R)) _TECHMAP_REPLACE_ (.D(D), .Q(Q), .C(C), .CE(CE), .R(1'b0));
+      wire _TECHMAP_FAIL_ = ~&_TECHMAP_CONSTMSK_L_ || _TECHMAP_CONSTVAL_L_ != 0;
+      if (CLKPOL)
+          FDRE #(.INIT(INIT_R)) _TECHMAP_REPLACE_ (.D(D), .Q(Q), .C(C), .CE(CE), .R(1'b0));
+      else
+          FDRE_1 #(.INIT(INIT_R)) _TECHMAP_REPLACE_ (.D(D), .Q(Q), .C(C), .CE(CE), .R(1'b0));
     end else
     if (DEPTH <= 16) begin
-      localparam [3:0] A = DEPTH - 1;
-      SRL16E #(.INIT(INIT_R), .IS_CLK_INVERTED(~CLKPOL[0])) _TECHMAP_REPLACE_ (.A0(A[0]), .A1(A[1]), .A2(A[2]), .A3(A[3]), .CE(CE), .CLK(C), .D(D), .Q(Q));
+      SRL16E #(.INIT(INIT_R), .IS_CLK_INVERTED(~CLKPOL[0])) _TECHMAP_REPLACE_ (.A0(L[0]), .A1(L[1]), .A2(L[2]), .A3(L[3]), .CE(CE), .CLK(C), .D(D), .Q(Q));
     end else
     if (DEPTH > 17 && DEPTH <= 32) begin
-      SRLC32E #(.INIT(INIT_R), .IS_CLK_INVERTED(~CLKPOL[0])) _TECHMAP_REPLACE_ (.A(DEPTH-1), .CE(CE), .CLK(C), .D(D), .Q(Q));
+      SRLC32E #(.INIT(INIT_R), .IS_CLK_INVERTED(~CLKPOL[0])) _TECHMAP_REPLACE_ (.A(L[4:0]), .CE(CE), .CLK(C), .D(D), .Q(Q));
     end else
     if (DEPTH > 33 && DEPTH <= 64) begin
       wire T0, T1, T2;
-      localparam [5:0] A = DEPTH-1;
-      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(A[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
-      \$__SHREG_ #(.DEPTH(DEPTH-32), .INIT(INIT[DEPTH-32-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T1), .E(E), .Q(T2));
-      MUXF7 fpga_mux_0 (.O(Q), .I0(T0), .I1(T2), .S(A[5]));
+      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(L[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
+      \$__SHREG_ #(.DEPTH(DEPTH-32), .INIT(INIT[DEPTH-32-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T1), .L(L), .E(E), .Q(T2));
+      if (&_TECHMAP_CONSTMSK_L_)
+        assign Q = T2;
+      else
+        MUXF7 fpga_mux_0 (.O(Q), .I0(T0), .I1(T2), .S(L[5]));
     end else
     if (DEPTH > 65 && DEPTH <= 96) begin
-      localparam [6:0] A = DEPTH-1;
       wire T0, T1, T2, T3, T4, T5, T6;
-      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(A[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
-      SRLC32E #(.INIT(INIT_R[64-1:32]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_1 (.A(A[4:0]), .CE(CE), .CLK(C), .D(T1), .Q(T2), .Q31(T3));
-      \$__SHREG_ #(.DEPTH(DEPTH-64), .INIT(INIT[DEPTH-64-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_2 (.C(C), .D(T3), .E(E), .Q(T4));
-      MUXF7 fpga_mux_0 (.O(T5), .I0(T0), .I1(T2), .S(A[5]));
-      MUXF7 fpga_mux_1 (.O(T6), .I0(T4), .I1(1'b0 /* unused */), .S(A[5]));
-      MUXF8 fpga_mux_2 (.O(Q), .I0(T5), .I1(T6), .S(A[6]));
+      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(L[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
+      SRLC32E #(.INIT(INIT_R[64-1:32]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_1 (.A(L[4:0]), .CE(CE), .CLK(C), .D(T1), .Q(T2), .Q31(T3));
+      \$__SHREG_ #(.DEPTH(DEPTH-64), .INIT(INIT[DEPTH-64-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_2 (.C(C), .D(T3), .L(L[4:0]), .E(E), .Q(T4));
+      if (&_TECHMAP_CONSTMSK_L_)
+        assign Q = T4;
+      else begin
+         MUXF7 fpga_mux_0 (.O(T5), .I0(T0), .I1(T2), .S(L[5]));
+        MUXF7 fpga_mux_1 (.O(T6), .I0(T4), .I1(1'b0 /* unused */), .S(L[5]));
+        MUXF8 fpga_mux_2 (.O(Q), .I0(T5), .I1(T6), .S(L[6]));
+      end
     end else
     if (DEPTH > 97 && DEPTH <= 128) begin
-      localparam [6:0] A = DEPTH-1;
       wire T0, T1, T2, T3, T4, T5, T6, T7, T8;
-      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(A[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
-      SRLC32E #(.INIT(INIT_R[64-1:32]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_1 (.A(A[4:0]), .CE(CE), .CLK(C), .D(T1), .Q(T2), .Q31(T3));
-      SRLC32E #(.INIT(INIT_R[96-1:64]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_2 (.A(A[4:0]), .CE(CE), .CLK(C), .D(T3), .Q(T4), .Q31(T5));
-      \$__SHREG_ #(.DEPTH(DEPTH-96), .INIT(INIT[DEPTH-96-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_3 (.C(C), .D(T5), .E(E), .Q(T6));
-      MUXF7 fpga_mux_0 (.O(T7), .I0(T0), .I1(T2), .S(A[5]));
-      MUXF7 fpga_mux_1 (.O(T8), .I0(T4), .I1(T6), .S(A[5]));
-      MUXF8 fpga_mux_2 (.O(Q), .I0(T7), .I1(T8), .S(A[6]));
+      SRLC32E #(.INIT(INIT_R[32-1:0]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_0 (.A(L[4:0]), .CE(CE), .CLK(C), .D(D), .Q(T0), .Q31(T1));
+      SRLC32E #(.INIT(INIT_R[64-1:32]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_1 (.A(L[4:0]), .CE(CE), .CLK(C), .D(T1), .Q(T2), .Q31(T3));
+      SRLC32E #(.INIT(INIT_R[96-1:64]), .IS_CLK_INVERTED(~CLKPOL[0])) fpga_srl_2 (.A(L[4:0]), .CE(CE), .CLK(C), .D(T3), .Q(T4), .Q31(T5));
+      \$__SHREG_ #(.DEPTH(DEPTH-96), .INIT(INIT[DEPTH-96-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_3 (.C(C), .D(T5), .L(L[4:0]), .E(E), .Q(T6));
+      if (&_TECHMAP_CONSTMSK_L_)
+        assign Q = T6;
+      else begin
+        MUXF7 fpga_mux_0 (.O(T7), .I0(T0), .I1(T2), .S(L[5]));
+        MUXF7 fpga_mux_1 (.O(T8), .I0(T4), .I1(T6), .S(L[5]));
+        MUXF8 fpga_mux_2 (.O(Q), .I0(T7), .I1(T8), .S(L[6]));
+      end
     end
-    else if (DEPTH <= 129) begin
+    else if (DEPTH < 129 || (DEPTH <= 129 && &_TECHMAP_CONSTMSK_L_)) begin
       // Handle cases where depth is just 1 over a convenient value,
-      // in which case use the flop
-      wire T0;
-      \$__SHREG_ #(.DEPTH(DEPTH-1), .INIT(INIT[DEPTH-1:1]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_0 (.C(C), .D(D), .E(E), .Q(T0));
-      \$__SHREG_ #(.DEPTH(1), .INIT(INIT[0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T0), .E(E), .Q(Q));
-    end else
-    begin
-      // UG474 (v1.8, p34) states that:
-      //   "There are no direct connections between slices to form longer shift
-      //    registers, nor is the MC31 output at LUT B/C/D available."
-      wire T0;
-      \$__SHREG_ #(.DEPTH(128), .INIT(INIT[DEPTH-1:DEPTH-128]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_0 (.C(C), .D(D), .E(E), .Q(T0));
-      \$__SHREG_ #(.DEPTH(DEPTH-128), .INIT(INIT[DEPTH-128-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T0), .E(E), .Q(Q));
+      if (&_TECHMAP_CONSTMSK_L_) begin
+        // For constant length, use the flop
+        wire T0;
+        \$__SHREG_ #(.DEPTH(DEPTH-1), .INIT(INIT[DEPTH-1:1]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_0 (.C(C), .D(D), .L(DEPTH-1-1), .E(E), .Q(T0));
+        \$__SHREG_ #(.DEPTH(1), .INIT(INIT[0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T0), .L(0), .E(E), .Q(Q));
+      end
+      else begin
+        // For variable length, bump up to the next length
+        // because we can't access Q31
+        \$__SHREG_ #(.DEPTH(DEPTH+1), .INIT(INIT), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) _TECHMAP_REPLACE_ (.C(C), .D(D), .L(L), .E(E), .Q(Q));
+      end
+    end 
+    else begin
+      if (&_TECHMAP_CONSTMSK_L_) begin
+        // UG474 (v1.8, p34) states that:
+        //   "There are no direct connections between slices to form longer shift
+        //    registers, nor is the MC31 output at LUT B/C/D available."
+        wire T0;
+        \$__SHREG_ #(.DEPTH(128), .INIT(INIT[DEPTH-1:DEPTH-128]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_0 (.C(C), .D(D), .L(127), .E(E), .Q(T0));
+        \$__SHREG_ #(.DEPTH(DEPTH-128), .INIT(INIT[DEPTH-128-1:0]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl_1 (.C(C), .D(T0), .L(DEPTH-1-128), .E(E), .Q(Q));
+      end
+      else begin
+        // No way to create variable length shift registers >128 bits as Q31
+        // cannot be output to the fabric...
+        wire [DEPTH-1:-1] c;
+        genvar i;
+        for (i = 0; i < DEPTH; i=i+1)
+            \$__SHREG_ #(.DEPTH(1), .INIT(INIT_R[i]), .CLKPOL(CLKPOL), .ENPOL(ENPOL)) fpga_srl (.C(C), .D(c[i-1]), .L(0), .E(E), .Q(c[i]));
+        assign { c[-1], Q } = { D, c[L] };
+      end
     end
   endgenerate
 endmodule
diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc
index f2c3833a4..443ac4eed 100644
--- a/techlibs/xilinx/synth_xilinx.cc
+++ b/techlibs/xilinx/synth_xilinx.cc
@@ -110,9 +110,8 @@ struct SynthXilinxPass : public Pass
 		log("        dffsr2dff\n");
 		log("        dff2dffe\n");
 		log("        opt -full\n");
-		log("        techmap -map +/techmap.v -map +/xilinx/arith_map.v\n");
-		log("        shregmap -init -params -enpol any_or_none\n");
-		log("        techmap -map +/xilinx/ff_map.v\n");
+		log("        shregmap -tech xilinx\n");
+		log("        techmap -map +/techmap.v -map +/xilinx/arith_map.v +/xilinx/ff_map.v\n");
 		log("        opt -fast\n");
 		log("\n");
 		log("    map_luts:\n");
@@ -256,14 +255,17 @@ struct SynthXilinxPass : public Pass
 			Pass::call(design, "dff2dffe");
 			Pass::call(design, "opt -full");
 
+			Pass::call(design, "simplemap t:$dff*");
+			Pass::call(design, "shregmap -tech xilinx");
+			Pass::call(design, "techmap -map +/xilinx/cells_map.v t:$__SHREG_");
+			Pass::call(design, "opt -fast");
+
 			if (vpr) {
-				Pass::call(design, "techmap -map +/techmap.v -map +/xilinx/arith_map.v -D _EXPLICIT_CARRY");
+				Pass::call(design, "techmap -map +/techmap.v -map +/xilinx/arith_map.v -map +/xilinx/ff_map.v -D _EXPLICIT_CARRY");
 			} else {
-				Pass::call(design, "techmap -map +/techmap.v -map +/xilinx/arith_map.v");
+				Pass::call(design, "techmap -map +/techmap.v -map +/xilinx/arith_map.v -map +/xilinx/ff_map.v");
 			}
 
-			Pass::call(design, "shregmap -init -params -enpol any_or_none");
-			Pass::call(design, "techmap -map +/xilinx/ff_map.v");
 			Pass::call(design, "opt -fast");
 		}