66 // User interface read buffer. Re orders read data returned from the
67 // memory controller back to the request order.
68 //
69 // Consists of a large buffer for the data, a status RAM and two counters.
70 //
71 // The large buffer is implemented with distributed RAM in 6 bit wide,
72 // 1 read, 1 write mode. The status RAM is implemented with a distributed
73 // RAM configured as 2 bits wide 1 read/write, 1 read mode.
74 //
75 // As read requests are received from the application, the data_buf_addr
76 // counter supplies the data_buf_addr sent into the memory controller.
77 // With each read request, the counter is incremented, eventually rolling
78 // over. This mechanism labels each read request with an incrementing number.
79 //
80 // When the memory controller returns read data, it echos the original
81 // data_buf_addr with the read data.
82 //
83 // The status RAM is indexed with the same address as the data buffer
84 // RAM. Each word of the data buffer RAM has an associated status bit
85 // and "end" bit. Requests of size 1 return a data burst on two consecutive
86 // states. Requests of size zero return with a single assertion of rd_data_en.
87 //
88 // Upon returning data, the status and end bits are updated for each
89 // corresponding location in the status RAM indexed by the data_buf_addr
90 // echoed on the rd_data_addr field.
91 //
92 // The other side of the status and data RAMs is indexed by the rd_buf_indx.
93 // The rd_buf_indx constantly monitors the status bit it is currently
94 // pointing to. When the status becomes set to the proper state (more on
95 // this later) read data is returned to the application, and the rd_buf_indx
96 // is incremented.
97 //
98 // At rst the rd_buf_indx is initialized to zero. Data will not have been
99 // returned from the memory controller yet, so there is nothing to return
100 // to the application. Evenutally, read requests will be made, and the
101 // memory controller will return the corresponding data. The memory
102 // controller may not return this data in the request order. In which
103 // case, the status bit at location zero, will not indicate
104 // the data for request zero is ready. Eventually, the memory controller
105 // will return data for request zero. The data is forwarded on to the
106 // application, and rd_buf_indx is incremented to point to the next status
107 // bits and data in the buffers. The status bit will be examined, and if
108 // data is valid, this data will be returned as well. This process
109 // continues until the status bit indexed by rd_buf_indx indicates data
110 // is not ready. This may be because the rd_data_buf
111 // is empty, or that some data was returned out of order. Since rd_buf_indx
112 // always increments sequentially, data is always returned to the application
113 // in request order.
114 //
115 // Some further discussion of the status bit is in order. The rd_data_buf
116 // is a circular buffer. The status bit is a single bit. Distributed RAM
117 // supports only a single write port. The write port is consumed by
118 // memory controller read data updates. If a simple '1' were used to
119 // indicate the status, when rd_data_indx rolled over it would immediately
120 // encounter a one for a request that may not be ready.
121 //
122 // This problem is solved by causing read data returns to flip the
123 // status bit, and adding hi order bit beyond the size required to
124 // index the rd_data_buf. Data is considered ready when the status bit
125 // and this hi order bit are equal.
126 //
127 // The status RAM needs to be initialized to zero after reset. This is
128 // accomplished by cycling through all rd_buf_indx valus and writing a
129 // zero to the status bits directly following deassertion of reset. This
130 // mechanism is used for similar purposes
131 // for the wr_data_buf.
132 //
133 // When ORDERING == "STRICT", read data reordering is unnecessary. For thi
134 // case, most of the logic in the block is not generated.
136 `timescale 1 ps / 1 ps
138 // User interface read data.
141  (
142  parameter TCQ = 100,
143  parameter APP_DATA_WIDTH = 256,
144  parameter DATA_BUF_ADDR_WIDTH = 5,
145  parameter ECC = "OFF",
146  parameter nCK_PER_CLK = 2 ,
147  parameter ORDERING = "NORM"
148  )
149  (/*AUTOARG**/
150  // Outputs
151  ram_init_done_r, ram_init_addr, app_rd_data_valid, app_rd_data_end,
152  app_rd_data, app_ecc_multiple_err, rd_buf_full, rd_data_buf_addr_r,
153  // Inputs
154  rst, clk, rd_data_en, rd_data_addr, rd_data_offset, rd_data_end,
155  rd_data, ecc_multiple, rd_accepted
156  );
158  input rst;
159  input clk;
161  output wire ram_init_done_r;
162  output wire [3:0] ram_init_addr;
164 // rd_buf_indx points to the status and data storage rams for
165 // reading data out to the app.
166  reg [5:0] rd_buf_indx_r;
167 (* keep = "true", max_fanout = 10 *) reg ram_init_done_r_lcl /* synthesis syn_maxfan = 10 **/;
168  assign ram_init_done_r = ram_init_done_r_lcl;
169  wire app_rd_data_valid_ns;
170  wire single_data;
171  reg [5:0] rd_buf_indx_ns;
172  generate begin : rd_buf_indx
173  wire upd_rd_buf_indx = ~ram_init_done_r_lcl || app_rd_data_valid_ns;
174 // Loop through all status write addresses once after rst. Initializes
175 // the status and pointer RAMs.
176  wire ram_init_done_ns =
177  ~rst && (ram_init_done_r_lcl || (rd_buf_indx_r[4:0] == 5'h1f));
178  always @(posedge clk) ram_init_done_r_lcl <= #TCQ ram_init_done_ns;
180  always @(/*AS**/rd_buf_indx_r or rst or single_data
181  or upd_rd_buf_indx) begin
182  rd_buf_indx_ns = rd_buf_indx_r;
183  if (rst) rd_buf_indx_ns = 6'b0;
184  else if (upd_rd_buf_indx) rd_buf_indx_ns =
185  // need to use every slot of RAMB32 if all address bits are used
186  rd_buf_indx_r + 6'h1 + (DATA_BUF_ADDR_WIDTH == 5 ? 0 : single_data);
187  end
188  always @(posedge clk) rd_buf_indx_r <= #TCQ rd_buf_indx_ns;
189  end
190  endgenerate
191  assign ram_init_addr = rd_buf_indx_r[3:0];
193  input rd_data_en;
194  input [DATA_BUF_ADDR_WIDTH-1:0] rd_data_addr;
195  input rd_data_offset;
196  input rd_data_end;
197  input [APP_DATA_WIDTH-1:0] rd_data;
198 (* keep = "true", max_fanout = 10 *) output reg app_rd_data_valid /* synthesis syn_maxfan = 10 **/;
199  output reg app_rd_data_end;
200  output reg [APP_DATA_WIDTH-1:0] app_rd_data;
201  input [3:0] ecc_multiple;
202  reg [2*nCK_PER_CLK-1:0] app_ecc_multiple_err_r = 'b0;
203  output wire [2*nCK_PER_CLK-1:0] app_ecc_multiple_err;
204  assign app_ecc_multiple_err = app_ecc_multiple_err_r;
205  input rd_accepted;
206  output wire rd_buf_full;
207  output wire [DATA_BUF_ADDR_WIDTH-1:0] rd_data_buf_addr_r;
209 // Compute dimensions of read data buffer. Depending on width of
210 // DQ bus and DRAM CK
211 // to fabric ratio, number of RAM32Ms is variable. RAM32Ms are used in
212 // single write, single read, 6 bit wide mode.
213  localparam RD_BUF_WIDTH = APP_DATA_WIDTH + (ECC == "OFF" ? 0 : 2*nCK_PER_CLK);
214  localparam FULL_RAM_CNT = (RD_BUF_WIDTH/6);
215  localparam REMAINDER = RD_BUF_WIDTH % 6;
216  localparam RAM_CNT = FULL_RAM_CNT + ((REMAINDER == 0 ) ? 0 : 1);
217  localparam RAM_WIDTH = (RAM_CNT*6);
218  generate
219  if (ORDERING == "STRICT") begin : strict_mode
220  assign app_rd_data_valid_ns = 1'b0;
221  assign single_data = 1'b0;
222  assign rd_buf_full = 1'b0;
223  reg [DATA_BUF_ADDR_WIDTH-1:0] rd_data_buf_addr_r_lcl;
224  wire [DATA_BUF_ADDR_WIDTH-1:0] rd_data_buf_addr_ns =
225  rst
226  ? 0
227  : rd_data_buf_addr_r_lcl + rd_accepted;
228  always @(posedge clk) rd_data_buf_addr_r_lcl <=
229  #TCQ rd_data_buf_addr_ns;
230  assign rd_data_buf_addr_r = rd_data_buf_addr_ns;
231 // app_* signals required to be registered.
232  if (ECC == "OFF") begin : ecc_off
233  always @(/*AS**/rd_data) app_rd_data = rd_data;
234  always @(/*AS**/rd_data_en) app_rd_data_valid = rd_data_en;
235  always @(/*AS**/rd_data_end) app_rd_data_end = rd_data_end;
236  end
237  else begin : ecc_on
238  always @(posedge clk) app_rd_data <= #TCQ rd_data;
239  always @(posedge clk) app_rd_data_valid <= #TCQ rd_data_en;
240  always @(posedge clk) app_rd_data_end <= #TCQ rd_data_end;
241  always @(posedge clk) app_ecc_multiple_err_r <= #TCQ ecc_multiple;
242  end
243  end
244  else begin : not_strict_mode
245 (* keep = "true", max_fanout = 10 *) wire rd_buf_we = ~ram_init_done_r_lcl || rd_data_en /* synthesis syn_maxfan = 10 **/;
246  // In configurations where read data is returned in a single fabric cycle
247  // the offset is always zero and we can use the bit to get a deeper
248  // FIFO. The RAMB32 has 5 address bits, so when the DATA_BUF_ADDR_WIDTH
249  // is set to use them all, discard the offset. Otherwise, include the
250  // offset.
251  wire [4:0] rd_buf_wr_addr = DATA_BUF_ADDR_WIDTH == 5 ?
252  rd_data_addr :
253  {rd_data_addr, rd_data_offset};
254  wire [1:0] rd_status;
255 // Instantiate status RAM. One bit for status and one for "end".
256  begin : status_ram
257 // Turns out read to write back status is a timing path. Update
258 // the status in the ram on the state following the read. Bypass
259 // the write data into the status read path.
260  wire [4:0] status_ram_wr_addr_ns = ram_init_done_r_lcl
261  ? rd_buf_wr_addr
262  : rd_buf_indx_r[4:0];
263  reg [4:0] status_ram_wr_addr_r;
264  always @(posedge clk) status_ram_wr_addr_r <=
265  #TCQ status_ram_wr_addr_ns;
266  wire [1:0] wr_status;
267 // Not guaranteed to write second status bit. If it is written, always
268 // copy in the first status bit.
269  reg wr_status_r1;
270  always @(posedge clk) wr_status_r1 <= #TCQ wr_status[0];
271  wire [1:0] status_ram_wr_data_ns =
272  ram_init_done_r_lcl
273  ? {rd_data_end, ~(rd_data_offset
274  ? wr_status_r1
275  : wr_status[0])}
276  : 2'b0;
277  reg [1:0] status_ram_wr_data_r;
278  always @(posedge clk) status_ram_wr_data_r <=
279  #TCQ status_ram_wr_data_ns;
280  reg rd_buf_we_r1;
281  always @(posedge clk) rd_buf_we_r1 <= #TCQ rd_buf_we;
282  RAM32M
283  #(.INIT_A(64'h0000000000000000),
284  .INIT_B(64'h0000000000000000),
285  .INIT_C(64'h0000000000000000),
286  .INIT_D(64'h0000000000000000)
287  ) RAM32M0 (
288  .DOA(rd_status),
289  .DOB(),
290  .DOC(wr_status),
291  .DOD(),
292  .DIA(status_ram_wr_data_r),
293  .DIB(2'b0),
294  .DIC(status_ram_wr_data_r),
295  .DID(status_ram_wr_data_r),
296  .ADDRA(rd_buf_indx_r[4:0]),
297  .ADDRB(5'b0),
298  .ADDRC(status_ram_wr_addr_ns),
299  .ADDRD(status_ram_wr_addr_r),
300  .WE(rd_buf_we_r1),
301  .WCLK(clk)
302  );
303  end // block: status_ram
305  wire [RAM_WIDTH-1:0] rd_buf_out_data;
306  begin : rd_buf
307  wire [RAM_WIDTH-1:0] rd_buf_in_data;
308  if (REMAINDER == 0)
309  if (ECC == "OFF")
310  assign rd_buf_in_data = rd_data;
311  else
312  assign rd_buf_in_data = {ecc_multiple, rd_data};
313  else
314  if (ECC == "OFF")
315  assign rd_buf_in_data = {{6-REMAINDER{1'b0}}, rd_data};
316  else
317  assign rd_buf_in_data =
318  {{6-REMAINDER{1'b0}}, ecc_multiple, rd_data};
320  // Dedicated copy for driving distributed RAM.
321  (* keep = "true" *) reg [4:0] rd_buf_indx_copy_r /* synthesis syn_keep = 1 **/;
322  always @(posedge clk) rd_buf_indx_copy_r <= #TCQ rd_buf_indx_ns[4:0];
324  genvar i;
325  for (i=0; i<RAM_CNT; i=i+1) begin : rd_buffer_ram
326  RAM32M
327  #(.INIT_A(64'h0000000000000000),
328  .INIT_B(64'h0000000000000000),
329  .INIT_C(64'h0000000000000000),
330  .INIT_D(64'h0000000000000000)
331  ) RAM32M0 (
332  .DOA(rd_buf_out_data[((i*6)+4)+:2]),
333  .DOB(rd_buf_out_data[((i*6)+2)+:2]),
334  .DOC(rd_buf_out_data[((i*6)+0)+:2]),
335  .DOD(),
336  .DIA(rd_buf_in_data[((i*6)+4)+:2]),
337  .DIB(rd_buf_in_data[((i*6)+2)+:2]),
338  .DIC(rd_buf_in_data[((i*6)+0)+:2]),
339  .DID(2'b0),
340  .ADDRA(rd_buf_indx_copy_r[4:0]),
341  .ADDRB(rd_buf_indx_copy_r[4:0]),
342  .ADDRC(rd_buf_indx_copy_r[4:0]),
343  .ADDRD(rd_buf_wr_addr),
344  .WE(rd_buf_we),
345  .WCLK(clk)
346  );
347  end // block: rd_buffer_ram
348  end
350  wire rd_data_rdy = (rd_status[0] == rd_buf_indx_r[5]);
351 (* keep = "true", max_fanout = 10 *) wire bypass = rd_data_en && (rd_buf_wr_addr[4:0] == rd_buf_indx_r[4:0]) /* synthesis syn_maxfan = 10 **/;
352  assign app_rd_data_valid_ns =
353  ram_init_done_r_lcl && (bypass || rd_data_rdy);
354  wire app_rd_data_end_ns = bypass ? rd_data_end : rd_status[1];
355  always @(posedge clk) app_rd_data_valid <= #TCQ app_rd_data_valid_ns;
356  always @(posedge clk) app_rd_data_end <= #TCQ app_rd_data_end_ns;
358  assign single_data =
359  app_rd_data_valid_ns && app_rd_data_end_ns && ~rd_buf_indx_r[0];
361  wire [APP_DATA_WIDTH-1:0] app_rd_data_ns =
362  bypass
363  ? rd_data
364  : rd_buf_out_data[APP_DATA_WIDTH-1:0];
365  always @(posedge clk) app_rd_data <= #TCQ app_rd_data_ns;
366  if (ECC != "OFF") begin : assign_app_ecc_multiple
367  wire [3:0] app_ecc_multiple_err_ns =
368  bypass
369  ? ecc_multiple
370  : rd_buf_out_data[APP_DATA_WIDTH+:4];
371  always @(posedge clk) app_ecc_multiple_err_r <=
372  #TCQ app_ecc_multiple_err_ns;
373  end
375  //Added to fix timing. The signal app_rd_data_valid has
376  //a very high fanout. So making a dedicated copy for usage
377  //with the occ_cnt counter.
378  (* equivalent_register_removal = "no" *)
379  reg app_rd_data_valid_copy;
380  always @(posedge clk) app_rd_data_valid_copy <= #TCQ app_rd_data_valid_ns;
381 // Keep track of how many entries in the queue hold data.
382  wire free_rd_buf = app_rd_data_valid_copy && app_rd_data_end; //changed to use registered version
383  //of the signals in ordered to fix timing
384  reg [DATA_BUF_ADDR_WIDTH:0] occ_cnt_r;
385  wire [DATA_BUF_ADDR_WIDTH:0] occ_minus_one = occ_cnt_r - 1;
386  wire [DATA_BUF_ADDR_WIDTH:0] occ_plus_one = occ_cnt_r + 1;
387  begin : occupied_counter
388  reg [DATA_BUF_ADDR_WIDTH:0] occ_cnt_ns;
389  always @(/*AS**/free_rd_buf or occ_cnt_r or rd_accepted or rst or occ_minus_one or occ_plus_one) begin
390  occ_cnt_ns = occ_cnt_r;
391  if (rst) occ_cnt_ns = 0;
392  else case ({rd_accepted, free_rd_buf})
393  2'b01 : occ_cnt_ns = occ_minus_one;
394  2'b10 : occ_cnt_ns = occ_plus_one;
395  endcase // case ({wr_data_end, new_rd_data})
396  end
397  always @(posedge clk) occ_cnt_r <= #TCQ occ_cnt_ns;
398  assign rd_buf_full = occ_cnt_ns[DATA_BUF_ADDR_WIDTH];
400 `ifdef MC_SVA
401  rd_data_buffer_full: cover property (@(posedge clk) (~rst && rd_buf_full));
402  rd_data_buffer_inc_dec_15: cover property (@(posedge clk)
403  (~rst && rd_accepted && free_rd_buf && (occ_cnt_r == 'hf)));
404  rd_data_underflow: assert property (@(posedge clk)
405  (rst || !((occ_cnt_r == 'b0) && (occ_cnt_ns == 'h1f))));
406  rd_data_overflow: assert property (@(posedge clk)
407  (rst || !((occ_cnt_r == 'h10) && (occ_cnt_ns == 'h11))));
408 `endif
409  end // block: occupied_counter
412 // Generate the data_buf_address written into the memory controller
413 // for reads. Increment with each accepted read, and rollover at 0xf.
414  reg [DATA_BUF_ADDR_WIDTH-1:0] rd_data_buf_addr_r_lcl;
415  assign rd_data_buf_addr_r = rd_data_buf_addr_r_lcl;
416  begin : data_buf_addr
417  reg [DATA_BUF_ADDR_WIDTH-1:0] rd_data_buf_addr_ns;
418  always @(/*AS**/rd_accepted or rd_data_buf_addr_r_lcl or rst) begin
419  rd_data_buf_addr_ns = rd_data_buf_addr_r_lcl;
420  if (rst) rd_data_buf_addr_ns = 0;
421  else if (rd_accepted) rd_data_buf_addr_ns =
422  rd_data_buf_addr_r_lcl + 1;
423  end
424  always @(posedge clk) rd_data_buf_addr_r_lcl <=
425  #TCQ rd_data_buf_addr_ns;
426  end // block: data_buf_addr
427  end // block: not_strict_mode
428  endgenerate
430 endmodule // ui_rd_data
432 // Local Variables:
433 // verilog-library-directories:(".")
434 // End: