/*
 * qpi_memctrl.v
 *
 * vim: ts=4 sw=4
 *
 * Copyright (C) 2019-2021  Sylvain Munaut <tnt@246tNt.com>
 * SPDX-License-Identifier: CERN-OHL-P-2.0
 */

`default_nettype none

`define MIN(a,b) (((a) < (b)) ? (a) : (b))
`define MAX(a,b) (((a) < (b)) ? (b) : (a))

module qpi_memctrl #(
	parameter integer CMD_READ  = 16'hEBEB,
	parameter integer CMD_WRITE = 16'h0202,
	parameter integer DUMMY_CLK = 6,
	parameter integer PAUSE_CLK = 3,
	parameter integer FIFO_DEPTH  = 1,
	parameter integer N_CS = 2,				/* CS count */
	parameter integer DATA_WIDTH = 32,		/* Access port width */
	parameter integer PHY_SPEED = 1,		/* Speed Factor: 1x 2x 4x */
	parameter integer PHY_WIDTH = 1,		/* Width Factor: 1x 2x    */
	parameter integer PHY_DELAY = 6,		/* See PHY doc */

	// auto
	parameter integer DL  = DATA_WIDTH-1,

	parameter integer STW = `MAX(DATA_WIDTH, 32),		/* Shifter Total Width */
	parameter integer SDW = DATA_WIDTH,					/* Shifter width used for data burst */
	parameter integer SCW = STW / PHY_WIDTH,			/* Shifter Channel Width */
	parameter integer SL  = STW -1,

	parameter integer PTW = (PHY_WIDTH * 4 * PHY_SPEED),	/* PHY Total   Width */
	parameter integer PCW = (            4 * PHY_SPEED),	/* PHY Channel Width */
	parameter integer PSW = (                PHY_SPEED)		/* PHY Signal  Width */
)(
	// PHY interface
	input  wire [PTW-1:0]  phy_io_i,
	output reg  [PTW-1:0]  phy_io_o,
	output reg  [    3:0]  phy_io_oe,
	output reg  [PSW-1:0]  phy_clk_o,
	output reg  [N_CS-1:0] phy_cs_o,

	// Memory interface
	input  wire [ 1:0] mi_addr_cs,
	input  wire [23:0] mi_addr,
	input  wire [ 6:0] mi_len,
	input  wire        mi_rw,		/* 0=Write, 1=Read */
	input  wire        mi_valid,
	output wire        mi_ready,

	input  wire [DL:0] mi_wdata,
	output wire        mi_wack,
	output wire        mi_wlast,

	output wire [DL:0] mi_rdata,
	output wire        mi_rstb,
	output wire        mi_rlast,

	// Wishbone interface
	input  wire [ 4:0] wb_addr,
	input  wire [31:0] wb_wdata,
	output reg  [31:0] wb_rdata,
	input  wire        wb_we,
	input  wire        wb_cyc,
	output reg         wb_ack,

	// Common
	input wire clk,
	input wire rst
);


	// Mapping Helpers
	// ---------------

	/*
	 * PHY signal mapping:
	 * 		phy_io = [ chan_1 | chan_0 ]
	 * 		chan_i = [ io_3 | io_2 | io_1 | io_0 ]
	 * 		io_i   = [ t_0 ... t_n ]  (t_0 being the 'first')
	 *
	 * Shifter-Out format:
	 * Shifter-In format:
	 * 		shift_data = [ chan_1 | chan_0 ]
	 * 		chan_i_qpi = [ io_3(0) io_2(0) io_1(0) io_0(0) io_3(1) ... ]
	 * 		chan_i_spi = [ t_0 t_1 ... t_n ]  (t_0 being 'first')
	 *
	 * Mem IF data:
	 * 		mi_{r,w}data = [ b3 | b2 | b1 | b0 ]
	 *
	 * 		Data is stored in memory in big-endian (b3 b2 b1 b0)
	 * 		and in case of multiple channel (b1 b0) in chan 0 and (b3 b2) in chan 1
	 *
	 * Wishbone data:
	 *      spi_xfer: bits are taken in order and shifted out MSB first
	 *                In case of multiple channel the register is split in 2x16 bits
	 *      qpi_cmd:  bits are taken in order and shifted out MSB first
	 *      qpi_read / qpi_data: See Mem IF data mapping
	 */


	function [PTW-1:0] shift2phy_spi;
		input [STW-1:0] shift;
		input [PTW-1:0] base;
		integer chan;
		begin
			// Set default value for the signals that don't matter for SPI
			shift2phy_spi = base;

			// Overwrite only the PHY IO0 line (MOSI)
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
				shift2phy_spi[chan*PCW+:PSW] = shift[((chan+1)*SCW-1)-:PSW];
		end
	endfunction

	function [STW-1:0] shift_spi;
		input [STW-1:0] shift;
		integer chan;
		begin
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
				shift_spi[chan*SCW+:SCW] = { shift[chan*SCW+:SCW-PSW], {PSW{1'bx}} };
		end
	endfunction

	function [PTW-1:0] shift2phy_qpi_cmd;
		input [STW-1:0] shift;
		integer chan, io, t;
		begin
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
				for (t=0; t<PHY_SPEED; t=t+1)
					for (io=0; io<4; io=io+1)
						shift2phy_qpi_cmd[chan*PCW + io*PSW + t] = shift[STW - (4*PHY_SPEED) + t*4 + io];
		end
	endfunction

	function [STW-1:0] shift_qpi_cmd;
		input [STW-1:0] shift;
		begin
			if (STW > PCW)
				shift_qpi_cmd[STW-1:0] = { shift[STW-PCW-1:0], {PCW{1'bx}} };
			else
				shift_qpi_cmd = {STW{1'bx}};
		end
	endfunction

	function [PTW-1:0] shift2phy_qpi_data;
		input [STW-1:0] shift;
		integer chan, io, t;
		begin
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
				for (t=0; t<PHY_SPEED; t=t+1)
					for (io=0; io<4; io=io+1)
						shift2phy_qpi_data[chan*PCW + io*PSW + t] = shift[(chan+1)*SCW - (4*PHY_SPEED) + t*4 + io];
		end
	endfunction

	function [STW-1:0] shift_qpi_data;
		input [STW-1:0] shift;
		integer chan;
		begin
			if (SCW == PCW)
				shift_qpi_data = { STW{1'bx} };
			else
				for (chan=0; chan<PHY_WIDTH; chan=chan+1)
					shift_qpi_data[chan*SCW+:SCW] = { shift[chan*SCW+:SCW-PCW], {PCW{1'bx}} };
		end
	endfunction

	function [STW-1:0] phy2shift_spi;
		input [STW-1:0] prev;
		input [PTW-1:0] phy;
		integer chan, t;
		begin
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
			begin
				// Shift previous data
				phy2shift_spi[chan*SCW+PSW+:SCW-PSW] = prev[chan*SCW+:SCW-PSW];

				// Map new data
				phy2shift_spi[chan*SCW+:PSW] = phy[chan*PCW+PSW+:PSW];
			end
		end
	endfunction

	function [STW-1:0] phy2shift_qpi;
		input [STW-1:0] prev;
		input [PTW-1:0] phy;
		integer chan, t, io;
		begin
			for (chan=0; chan<PHY_WIDTH; chan=chan+1)
			begin
				// Shift previous data
				if (PCW != SCW)
					phy2shift_qpi[chan*SCW+PCW+:SCW-PCW] = prev[chan*SCW+:SCW-PCW];

				// Map new data
				for (t=0; t<PHY_SPEED; t=t+1)
					for (io=0; io<4; io=io+1)
						phy2shift_qpi[chan*SCW + t*4 + io] = phy[chan*PCW + io*PSW + t];
			end
		end
	endfunction


	// Signals
	// -------

	// Wishbone interface
	wire        wbi_we_csr;
	wire [31:0] wbi_rd_csr;
	wire [31:0] wbi_rd_rf;
	wire        wbi_rd_rst;

	// Command & Reponse FIFOs
	wire  [3:0] cf_dih;
	wire [31:0] cf_dil;
	reg         cf_wren;
	wire        cf_full;
	wire  [3:0] cf_doh;
	wire [31:0] cf_dol;
	wire        cf_rden;
	wire        cf_empty;

	wire [31:0] rf_di;
	wire        rf_wren_safe;
	wire        rf_wren;
	wire        rf_full;
	wire [31:0] rf_do;
	wire        rf_rden;
	wire        rf_empty;

	reg         rf_overflow;
	reg         rf_overflow_clr;
	reg         rf_rden_arm;

	// External control
	reg  [ 1:0] ectl_cs;
	reg         ectl_req;
	wire        ectl_grant;
	wire        ectl_idle;

	// Main state machine
	localparam
		ST_IDLE			= 0,
		ST_CMD_EXEC		= 1,
		ST_MI_WR_DATA	= 2,
		ST_MI_RD_DUMMY	= 3,
		ST_MI_RD_DATA	= 4,
		ST_FLUSH        = 5,
		ST_PAUSE		= 6;

	reg [2:0] state;
	reg [2:0] state_nxt;

	// Xfer counter
	reg  [ 7:0] xfer_cnt;
	wire        xfer_last;

	// Pause counter
	reg  [ 3:0] pause_cnt;
	wire        pause_last;

	// Memory interface
	wire [ 7:0] mi_spi_cmd;

	// Shift-Out
	localparam
		SO_MODE_SPI			= 2'b00,
		SO_MODE_QPI_RD		= 2'b01,
		SO_MODE_QPI_WR		= 2'b10,
		SO_MODE_QPI_CMD		= 2'b11;

	localparam
		SO_LD_SRC_WB		= 2'b00,
		SO_LD_SRC_MI_DATA	= 2'b10,
		SO_LD_SRC_MI_CMD	= 2'b11;

	localparam
		SO_DST_NONE			= 2'b00,
		SO_DST_WB			= 2'b10,
		SO_DST_MI			= 2'b11;

	wire        so_ld_now;
	reg         so_ld_valid;
	reg  [ 1:0] so_ld_mode;
	reg  [ 1:0] so_ld_dst;
	reg  [ 5:0] so_ld_cnt;
	reg  [ 1:0] so_ld_src;

	reg         so_valid;
	reg  [ 1:0] so_mode;
	reg  [ 1:0] so_dst;
	reg  [ 5:0] so_cnt;
	wire        so_last;
	reg  [SL:0] so_data;

	// Shift-In
	wire        si_mode_0;
	wire        si_mode_nm1;
	reg  [ 1:0] si_dst_1;
	wire [ 1:0] si_dst_n;

	reg  [SL:0] si_data_n;


	// Wishbone interface
	// ------------------

	// Ack
	always @(posedge clk)
	begin
		// Default is direct ack
		wb_ack <= wb_cyc & ~wb_ack;

		// Block on write to full command fifo
		if (wb_we & wb_addr[4] & cf_full)
			wb_ack <= 1'b0;

		// Block on read from empty response fifo if in blocking mode
		if (~wb_we & (wb_addr == 5'h3) & rf_empty)
			wb_ack <= 1'b0;
	end

	// CSR
	assign wbi_we_csr = wb_ack & wb_we & ~wb_addr[4];

	always @(posedge clk)
		if (rst)
			ectl_req <= 1'b0;
		else if (wbi_we_csr)
			ectl_req <= (ectl_req & ~wb_wdata[2]) | wb_wdata[1];

	always @(posedge clk)
		if (wbi_we_csr)
			ectl_cs <= wb_wdata[5:4];

	assign ectl_idle  = (state == ST_IDLE);
	assign ectl_grant = (state == ST_CMD_EXEC);

	always @(posedge clk)
		rf_overflow_clr <= wbi_we_csr & wb_wdata[9];

	assign wbi_rd_csr = {
		16'h0000,
		rf_empty, rf_full, rf_overflow, 1'b0,
		cf_empty, cf_full, 2'b0,
		2'b00, ectl_cs,
		1'b0, ectl_grant, ectl_req, ectl_idle
	};

	// Command FIFO write
	assign cf_dih = wb_addr[3:0];
	assign cf_dil = wb_wdata;

	always @(posedge clk)
		cf_wren <= wb_cyc & wb_we & ~wb_ack & wb_addr[4] & ~cf_full;

	// Response FIFO read
	always @(posedge clk)
		rf_rden_arm <= ~rf_empty & wb_addr[1] & ~wb_we;

	assign rf_rden = wb_ack & rf_rden_arm;

	assign wbi_rd_rf = rf_do;

	// Read mux
	assign wbi_rd_rst = ~wb_cyc | wb_ack;

	always @(posedge clk)
		if (wbi_rd_rst)
			wb_rdata <= 32'h0000000;
		else
			wb_rdata <= wb_addr[1] ? wbi_rd_rf : wbi_rd_csr;

	// FIFOs
	generate
		if (FIFO_DEPTH > 4) begin
			// Command
			fifo_sync_ram #(
				.DEPTH(FIFO_DEPTH),
				.WIDTH(32+4)
			) cmd_fifo_I (
				.wr_data  ({cf_dih, cf_dil}),
				.wr_ena   (cf_wren),
				.wr_full  (cf_full),
				.rd_data  ({cf_doh, cf_dol}),
				.rd_ena   (cf_rden),
				.rd_empty (cf_empty),
				.clk      (clk),
				.rst      (rst)
			);

			// Response
			fifo_sync_ram #(
				.DEPTH(FIFO_DEPTH),
				.WIDTH(32)
			) rsp_fifo_I (
				.wr_data  (rf_di),
				.wr_ena   (rf_wren_safe),
				.wr_full  (rf_full),
				.rd_data  (rf_do),
				.rd_ena   (rf_rden),
				.rd_empty (rf_empty),
				.clk      (clk),
				.rst      (rst)
			);
		end else begin
			// Command
			fifo_sync_shift #(
				.DEPTH(FIFO_DEPTH),
				.WIDTH(32+4)
			) cmd_fifo_I (
				.wr_data  ({cf_dih, cf_dil}),
				.wr_ena   (cf_wren),
				.wr_full  (cf_full),
				.rd_data  ({cf_doh, cf_dol}),
				.rd_ena   (cf_rden),
				.rd_empty (cf_empty),
				.clk      (clk),
				.rst      (rst)
			);

			// Response
			fifo_sync_shift #(
				.DEPTH(FIFO_DEPTH),
				.WIDTH(32)
			) rsp_fifo_I (
				.wr_data  (rf_di),
				.wr_ena   (rf_wren_safe),
				.wr_full  (rf_full),
				.rd_data  (rf_do),
				.rd_ena   (rf_rden),
				.rd_empty (rf_empty),
				.clk      (clk),
				.rst      (rst)
			);
		end
	endgenerate

	// Response overflow tracking
	assign rf_wren_safe = rf_wren & ~rf_full;

	always @(posedge clk)
		rf_overflow <= (rf_overflow & ~rf_overflow_clr) | (rf_wren & rf_full);

	// Capture responses
	assign rf_di   = si_data_n;
	assign rf_wren = (si_dst_n == 2'b01);


	// Main Control
	// ------------

	// State register
	always @(posedge clk)
		if (rst)
			state <= ST_IDLE;
		else
			state <= state_nxt;

	// Next-State logic
	always @(*)
	begin
		// Default
		state_nxt = state;

		// Transitions ?
		case (state)
			ST_IDLE:
				if (mi_valid)
					state_nxt = mi_rw ? ST_MI_RD_DUMMY : ST_MI_WR_DATA;
				else if (ectl_req)
					state_nxt = ST_CMD_EXEC;

			ST_CMD_EXEC:
				if (~ectl_req & cf_empty)
					state_nxt = ST_PAUSE;

			ST_MI_WR_DATA:
				if (xfer_last & so_ld_now)
					state_nxt = ST_FLUSH;

			ST_MI_RD_DUMMY:
				if (so_ld_now)
					state_nxt = ST_MI_RD_DATA;

			ST_MI_RD_DATA:
				if (xfer_last & so_ld_now)
					state_nxt = ST_FLUSH;

			ST_FLUSH:
				if (~so_valid)
					state_nxt = ST_PAUSE;

			ST_PAUSE:
				if (pause_last)
					state_nxt = ST_IDLE;
		endcase
	end

	// Xfer counter
	always @(posedge clk)
		if (state == ST_IDLE)
			xfer_cnt <= { 1'b0, mi_len } - 1;
		else if (((state == ST_MI_WR_DATA) || (state == ST_MI_RD_DATA)) && so_ld_now)
			xfer_cnt <= xfer_cnt - 1;

	assign xfer_last = xfer_cnt[7];

	// Pause counter
	always @(posedge clk)
		if (state == ST_PAUSE)
			pause_cnt <= pause_cnt - 1;
		else
			pause_cnt <= PAUSE_CLK - 2;

	assign pause_last = pause_cnt[3];

	// SPI command
	assign mi_spi_cmd = mi_rw ? CMD_READ[8*mi_addr_cs+:8] : CMD_WRITE[8*mi_addr_cs+:8];

	// ROM for command fifo counter
	(* mem2reg *)
	reg [5:0] cmd_len_rom[0:15];

	initial
	begin : rom_cmd_len
		integer i;
		for (i=0; i<16; i=i+1)
			cmd_len_rom[i] = (((i >> 2) & 3) == 0) ?
				(((i & 3) << 3) - PHY_SPEED + 7) :
				(((i & 3) << 1) - PHY_SPEED + 1);
	end

	// Shift control
		// When to load
	assign so_ld_now = ~so_valid | so_last;

		// What to load
	always @(*)
	begin
		// Defaults
		so_ld_valid = 1'b0;
		so_ld_mode  = 2'bxx;
		so_ld_dst   = 2'bxx;
		so_ld_cnt   = 6'bxxxxxx;
		so_ld_src   = 2'bxx;

		case (state)
			ST_IDLE: begin
				so_ld_valid = mi_valid;
				so_ld_mode  = SO_MODE_QPI_CMD;
				so_ld_dst   = SO_DST_NONE;
				so_ld_cnt   = (32 / 4) - PHY_SPEED - 1;
				so_ld_src   = SO_LD_SRC_MI_CMD;
			end

			ST_CMD_EXEC: begin
				so_ld_valid = ~cf_empty;
				case (cf_doh[3:2])
					2'b00: { so_ld_mode, so_ld_dst } = { SO_MODE_SPI,     SO_DST_WB   };
					2'b01: { so_ld_mode, so_ld_dst } = { SO_MODE_QPI_RD,  SO_DST_WB   };
					2'b10: { so_ld_mode, so_ld_dst } = { SO_MODE_QPI_WR,  SO_DST_NONE };
					2'b11: { so_ld_mode, so_ld_dst } = { SO_MODE_QPI_CMD, SO_DST_NONE };
				endcase
				so_ld_cnt   = cmd_len_rom[cf_doh];
				so_ld_src   = SO_LD_SRC_WB;
			end

			ST_MI_WR_DATA: begin
				so_ld_valid = 1'b1;
				so_ld_mode  = SO_MODE_QPI_WR;
				so_ld_dst   = SO_DST_NONE;
				so_ld_cnt   = (SDW / (4 * PHY_WIDTH)) - PHY_SPEED - 1;
				so_ld_src   = SO_LD_SRC_MI_DATA;
			end

			ST_MI_RD_DUMMY: begin
				so_ld_valid = 1'b1;
				so_ld_mode  = SO_MODE_QPI_RD;
				so_ld_dst   = SO_DST_NONE;
				so_ld_cnt   = DUMMY_CLK - PHY_SPEED - 1;
			end

			ST_MI_RD_DATA: begin
				so_ld_valid = 1'b1;
				so_ld_mode  = SO_MODE_QPI_RD;
				so_ld_dst   = SO_DST_MI;
				so_ld_cnt	= (SDW / (4 * PHY_WIDTH)) - PHY_SPEED - 1;
			end
		endcase
	end

	// Command interface
	assign cf_rden = (state == ST_CMD_EXEC) & so_ld_now & ~cf_empty;

	// Memory interface
	assign mi_ready = (state == ST_IDLE);

	assign mi_wack  = (state == ST_MI_WR_DATA) & so_ld_now;
	assign mi_wlast = xfer_last;

	assign mi_rdata = si_data_n[DL:0];
	assign mi_rstb  = si_dst_n[1];
	assign mi_rlast = si_dst_n[0];

	// Chip select
	always @(posedge clk)
		if (rst)
			phy_cs_o <= { N_CS{1'b1} };
		else begin
			case (state)
				ST_IDLE: begin
					// Default
					phy_cs_o <= { N_CS{1'b1} };

					if (mi_valid)
						phy_cs_o[mi_addr_cs] <= 1'b0;
					else if (ectl_req)
						phy_cs_o[ectl_cs] <= 1'b0;
				end

				ST_FLUSH:
					if (~so_valid)
						phy_cs_o <= { N_CS{1'b1} };

				ST_PAUSE:
					phy_cs_o <= { N_CS{1'b1} };
			endcase
		end


	// Shift-Out unit
	// --------------

		//					  Shift								Output
		// SPI mode			: Each chan shifts PHY_SPEED		Output only defined for MOSI
		// QPI read			: n/a                               n/a
		// QPI data mode	: Each chan shifts 4 * PHY_SPEED	Output QPI mode
		// QPI command mode	: Word shifts 4 * PHY_SPEED         chan[1] replicates chan[0]

	// Validity
	always @(posedge clk)
		if (rst)
			so_valid <= 1'b0;
		else
			so_valid <= (so_valid & ~so_last) | (so_ld_now & so_ld_valid);

	// Mode / Read-destination
	always @(posedge clk)
		if (so_ld_now) begin
			so_mode <= so_ld_mode;
			so_dst  <= so_ld_dst;
		end

	// Counter
	always @(posedge clk)
		if (so_ld_now)
			so_cnt <= so_ld_cnt;
		else
			so_cnt <= so_cnt - PHY_SPEED;

	assign so_last = so_cnt[5];

	// Shift register
	always @(posedge clk)
	begin
		casez ({so_ld_now, so_ld_src, so_mode})
			{ 1'b0, 2'bzz, SO_MODE_SPI }:		so_data <= shift_spi(so_data);
			{ 1'b0, 2'bzz, SO_MODE_QPI_WR }:	so_data <= shift_qpi_data(so_data);
			{ 1'b0, 2'bzz, SO_MODE_QPI_CMD }:	so_data <= shift_qpi_cmd(so_data);
			{ 1'b1, SO_LD_SRC_WB, 2'bzz }:		so_data <= cf_dol;
			{ 1'b1, SO_LD_SRC_MI_DATA, 2'bzz }:	so_data <= { mi_wdata, {(STW-SDW){1'b0} } };
			{ 1'b1, SO_LD_SRC_MI_CMD, 2'bzz }:	so_data <= { mi_spi_cmd, mi_addr };
			default:							so_data <= {STW{1'bx}};
		endcase
	end

	// IO control
	always @(*)
	begin : io_ctrl
		integer chan, i;

		// Control
		if (so_valid) begin
			// Clock
			if (PHY_SPEED > 1)
				for (i=0; i<PSW; i=i+1)
					phy_clk_o[i] = ~so_last | (i >= (PHY_SPEED-1-so_cnt[$clog2(PHY_SPEED)-1:0]));
			else
				phy_clk_o <= 1'b1;

			// Output Enable
			case (so_mode)
				SO_MODE_SPI:		phy_io_oe = 4'b0001;
				SO_MODE_QPI_RD:		phy_io_oe = 4'b0000;
				SO_MODE_QPI_WR:		phy_io_oe = 4'b1111;
				SO_MODE_QPI_CMD:	phy_io_oe = 4'b1111;
				default:            phy_io_oe = 4'bxxxx;
			endcase
		end else begin
			// Disable all
			phy_clk_o <= {PSW{1'b0}};
			phy_io_oe <= 4'b0000;
		end

		// Data
		if (so_mode[0])
			phy_io_o = shift2phy_qpi_cmd(so_data);
		else
			phy_io_o = shift2phy_qpi_data(so_data);

		if (~so_mode[1])
			phy_io_o = shift2phy_spi(so_data, phy_io_o);
	end


	// Shift-In unit
	// -------------

	// Capture control
	assign si_mode_0 = so_mode[0];

	always @(posedge clk)
	begin
		// Default destination is 'none'
		si_dst_1 <= 2'b00;

		// If it's a read, send it somewhere
		if (so_valid & so_last & ~so_mode[1] & so_dst[1])
			si_dst_1 <= so_dst[0] ? { 1'b1, (state == ST_FLUSH) } : 2'b01;
	end

	// Delay for PHY pipeline
	delay_bit #(PHY_DELAY)    dly_si_mode (si_mode_0, si_mode_nm1, clk);
	delay_bus #(PHY_DELAY, 2) dly_si_dst  (si_dst_1,  si_dst_n,    clk);

	// Shifter
	always @(posedge clk)
	begin
		// 2 modes:
		//  0 - SPI shift-in      PHY_SPEED bits at a time per channel
		//  1 - QPI shift-in  4 * PHY_SPEED bits at a time per channel
		if (si_mode_nm1)
			si_data_n <= phy2shift_qpi(si_data_n, phy_io_i);
		else
			si_data_n <= phy2shift_spi(si_data_n, phy_io_i);
	end

endmodule // qpi_memctrl