1 /***********************license start***************
2 * Copyright (c) 2003-2010 Cavium Networks (support@cavium.com). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17
18 * * Neither the name of Cavium Networks nor the names of
19 * its contributors may be used to endorse or promote products
20 * derived from this software without specific prior written
21 * permission.
22
23 * This Software, including technical data, may be subject to U.S. export control
24 * laws, including the U.S. Export Administration Act and its associated
25 * regulations, and may be subject to export or import regulations in other
26 * countries.
27
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT TO
31 * THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY REPRESENTATION OR
32 * DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT DEFECTS, AND CAVIUM
33 * SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES OF TITLE,
34 * MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR PURPOSE, LACK OF
35 * VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET POSSESSION OR
36 * CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT OF USE OR
37 * PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39
40
41
42
43
44
45
46 /**
47 * @file
48 *
49 * Interface to the PCI / PCIe DMA engines. These are only avialable
50 * on chips with PCI / PCIe.
51 *
52 * <hr>$Revision: 50126 $<hr>
53 */
54 #include "executive-config.h"
55 #include "cvmx-config.h"
56 #include "cvmx.h"
57 #include "cvmx-cmd-queue.h"
58 #include "cvmx-dma-engine.h"
59
60 #ifdef CVMX_ENABLE_PKO_FUNCTIONS
61
62 /**
63 * Return the number of DMA engimes supported by this chip
64 *
65 * @return Number of DMA engines
66 */
cvmx_dma_engine_get_num(void)67 int cvmx_dma_engine_get_num(void)
68 {
69 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
70 {
71 if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
72 return 4;
73 else
74 return 5;
75 }
76 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
77 return 8;
78 else
79 return 2;
80 }
81
82 /**
83 * Initialize the DMA engines for use
84 *
85 * @return Zero on success, negative on failure
86 */
cvmx_dma_engine_initialize(void)87 int cvmx_dma_engine_initialize(void)
88 {
89 int engine;
90
91 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
92 {
93 cvmx_cmd_queue_result_t result;
94 result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
95 0, CVMX_FPA_OUTPUT_BUFFER_POOL,
96 CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
97 if (result != CVMX_CMD_QUEUE_SUCCESS)
98 return -1;
99 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
100 {
101 cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
102 dmax_ibuff_saddr.u64 = 0;
103 dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
104 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
105 }
106 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
107 {
108 cvmx_dpi_dmax_ibuff_saddr_t dpi_dmax_ibuff_saddr;
109 dpi_dmax_ibuff_saddr.u64 = 0;
110 dpi_dmax_ibuff_saddr.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
111 dpi_dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
112 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), dpi_dmax_ibuff_saddr.u64);
113 }
114 else
115 {
116 uint64_t address = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine)));
117 if (engine)
118 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, address);
119 else
120 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, address);
121 }
122 }
123
124 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
125 {
126 cvmx_npei_dma_control_t dma_control;
127 dma_control.u64 = 0;
128 if (cvmx_dma_engine_get_num() >= 5)
129 dma_control.s.dma4_enb = 1;
130 dma_control.s.dma3_enb = 1;
131 dma_control.s.dma2_enb = 1;
132 dma_control.s.dma1_enb = 1;
133 dma_control.s.dma0_enb = 1;
134 dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
135 //dma_control.s.dwb_denb = 1;
136 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
137 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
138 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
139 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
140 /* As a workaround for errata PCIE-811 we only allow a single
141 outstanding DMA read over PCIe at a time. This limits performance,
142 but works in all cases. If you need higher performance, remove
143 this code and implement the more complicated workaround documented
144 in the errata. This only affects CN56XX pass 2.0 chips */
145 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
146 {
147 cvmx_npei_dma_pcie_req_num_t pcie_req_num;
148 pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
149 pcie_req_num.s.dma_cnt = 1;
150 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
151 }
152 }
153 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
154 {
155 cvmx_dpi_engx_buf_t dpi_engx_buf;
156 cvmx_dpi_dma_control_t dma_control;
157 cvmx_dpi_ctl_t dpi_ctl;
158
159 /* Give engine 0-4 1KB, and 5 3KB. This gives the packet engines better
160 performance. Total must not exceed 8KB */
161 dpi_engx_buf.u64 = 0;
162 dpi_engx_buf.s.blks = 2;
163 cvmx_write_csr(CVMX_DPI_ENGX_BUF(0), dpi_engx_buf.u64);
164 cvmx_write_csr(CVMX_DPI_ENGX_BUF(1), dpi_engx_buf.u64);
165 cvmx_write_csr(CVMX_DPI_ENGX_BUF(2), dpi_engx_buf.u64);
166 cvmx_write_csr(CVMX_DPI_ENGX_BUF(3), dpi_engx_buf.u64);
167 cvmx_write_csr(CVMX_DPI_ENGX_BUF(4), dpi_engx_buf.u64);
168 dpi_engx_buf.s.blks = 6;
169 cvmx_write_csr(CVMX_DPI_ENGX_BUF(5), dpi_engx_buf.u64);
170
171 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
172 dma_control.s.pkt_hp = 1;
173 dma_control.s.pkt_en = 1;
174 dma_control.s.dma_enb = 0x1f;
175 dma_control.s.dwb_denb = 1;
176 dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
177 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
178 dma_control.s.o_mode = 1;
179 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
180 dpi_ctl.u64 = cvmx_read_csr(CVMX_DPI_CTL);
181 dpi_ctl.s.en = 1;
182 cvmx_write_csr(CVMX_DPI_CTL, dpi_ctl.u64);
183 }
184 else
185 {
186 cvmx_npi_dma_control_t dma_control;
187 dma_control.u64 = 0;
188 //dma_control.s.dwb_denb = 1;
189 //dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
190 dma_control.s.o_add1 = 1;
191 dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
192 dma_control.s.hp_enb = 1;
193 dma_control.s.lp_enb = 1;
194 dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
195 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
196 }
197
198 return 0;
199 }
200
201
202 /**
203 * Shutdown all DMA engines. The engeines must be idle when this
204 * function is called.
205 *
206 * @return Zero on success, negative on failure
207 */
cvmx_dma_engine_shutdown(void)208 int cvmx_dma_engine_shutdown(void)
209 {
210 int engine;
211
212 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
213 {
214 if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
215 {
216 cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
217 return -1;
218 }
219 }
220
221 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
222 {
223 cvmx_npei_dma_control_t dma_control;
224 dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
225 if (cvmx_dma_engine_get_num() >= 5)
226 dma_control.s.dma4_enb = 0;
227 dma_control.s.dma3_enb = 0;
228 dma_control.s.dma2_enb = 0;
229 dma_control.s.dma1_enb = 0;
230 dma_control.s.dma0_enb = 0;
231 cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
232 /* Make sure the disable completes */
233 cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
234 }
235 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
236 {
237 cvmx_dpi_dma_control_t dma_control;
238 dma_control.u64 = cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
239 dma_control.s.dma_enb = 0;
240 cvmx_write_csr(CVMX_DPI_DMA_CONTROL, dma_control.u64);
241 /* Make sure the disable completes */
242 cvmx_read_csr(CVMX_DPI_DMA_CONTROL);
243 }
244 else
245 {
246 cvmx_npi_dma_control_t dma_control;
247 dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
248 dma_control.s.hp_enb = 0;
249 dma_control.s.lp_enb = 0;
250 cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
251 /* Make sure the disable completes */
252 cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
253 }
254
255 for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
256 {
257 cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
258 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
259 cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
260 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
261 cvmx_write_csr(CVMX_DPI_DMAX_IBUFF_SADDR(engine), 0);
262 else
263 {
264 if (engine)
265 cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
266 else
267 cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
268 }
269 }
270
271 return 0;
272 }
273
274
275 /**
276 * Submit a series of DMA comamnd to the DMA engines.
277 *
278 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
279 * @param header Command header
280 * @param num_buffers
281 * The number of data pointers
282 * @param buffers Comamnd data pointers
283 *
284 * @return Zero on success, negative on failure
285 */
cvmx_dma_engine_submit(int engine,cvmx_dma_engine_header_t header,int num_buffers,cvmx_dma_engine_buffer_t buffers[])286 int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
287 {
288 cvmx_cmd_queue_result_t result;
289 int cmd_count = 1;
290 uint64_t cmds[num_buffers + 1];
291
292 if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
293 {
294 /* Check for Errata PCIe-604 */
295 if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
296 {
297 cvmx_dprintf("DMA engine submit too large\n");
298 return -1;
299 }
300 }
301
302 cmds[0] = header.u64;
303 while (num_buffers--)
304 {
305 cmds[cmd_count++] = buffers->u64;
306 buffers++;
307 }
308
309 /* Due to errata PCIE-13315, it is necessary to have the queue lock while we
310 ring the doorbell for the DMA engines. This prevents doorbells from
311 possibly arriving out of order with respect to the command queue
312 entries */
313 __cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
314 result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
315 /* This SYNCWS is needed since the command queue didn't do locking, which
316 normally implies the SYNCWS. This one makes sure the command queue
317 updates make it to L2 before we ring the doorbell */
318 CVMX_SYNCWS;
319 /* A syncw isn't needed here since the command queue did one as part of the queue unlock */
320 if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
321 {
322 if (octeon_has_feature(OCTEON_FEATURE_NPEI))
323 {
324 /* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
325 cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
326 }
327 else if (octeon_has_feature(OCTEON_FEATURE_PCIE))
328 cvmx_write_csr(CVMX_DPI_DMAX_DBELL(engine), cmd_count);
329 else
330 {
331 if (engine)
332 cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
333 else
334 cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
335 }
336 }
337 /* Here is the unlock for the above errata workaround */
338 __cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
339 return result;
340 }
341
342
343 /**
344 * @INTERNAL
345 * Function used by cvmx_dma_engine_transfer() to build the
346 * internal address list.
347 *
348 * @param buffers Location to store the list
349 * @param address Address to build list for
350 * @param size Length of the memory pointed to by address
351 *
352 * @return Number of internal pointer chunks created
353 */
__cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t * buffers,uint64_t address,int size)354 static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
355 {
356 int segments = 0;
357 while (size)
358 {
359 /* Each internal chunk can contain a maximum of 8191 bytes */
360 int chunk = size;
361 if (chunk > 8191)
362 chunk = 8191;
363 buffers[segments].u64 = 0;
364 buffers[segments].internal.size = chunk;
365 buffers[segments].internal.addr = address;
366 address += chunk;
367 size -= chunk;
368 segments++;
369 }
370 return segments;
371 }
372
373
374 /**
375 * @INTERNAL
376 * Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
377 * list.
378 * @param buffers Location to store the list
379 * @param address Address to build list for
380 * @param size Length of the memory pointed to by address
381 *
382 * @return Number of PCI / PCIe address chunks created. The number of words used
383 * will be segments + (segments-1)/4 + 1.
384 */
__cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t * buffers,uint64_t address,int size)385 static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
386 {
387 const int MAX_SIZE = 65535;
388 int segments = 0;
389 while (size)
390 {
391 /* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
392 up to 4 addresses. This then repeats if more data is needed */
393 buffers[0].u64 = 0;
394 if (size <= MAX_SIZE)
395 {
396 /* Only one more segment needed */
397 buffers[0].pcie_length.len0 = size;
398 buffers[1].u64 = address;
399 segments++;
400 break;
401 }
402 else if (size <= MAX_SIZE * 2)
403 {
404 /* Two more segments needed */
405 buffers[0].pcie_length.len0 = MAX_SIZE;
406 buffers[0].pcie_length.len1 = size - MAX_SIZE;
407 buffers[1].u64 = address;
408 address += MAX_SIZE;
409 buffers[2].u64 = address;
410 segments+=2;
411 break;
412 }
413 else if (size <= MAX_SIZE * 3)
414 {
415 /* Three more segments needed */
416 buffers[0].pcie_length.len0 = MAX_SIZE;
417 buffers[0].pcie_length.len1 = MAX_SIZE;
418 buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
419 buffers[1].u64 = address;
420 address += MAX_SIZE;
421 buffers[2].u64 = address;
422 address += MAX_SIZE;
423 buffers[3].u64 = address;
424 segments+=3;
425 break;
426 }
427 else if (size <= MAX_SIZE * 4)
428 {
429 /* Four more segments needed */
430 buffers[0].pcie_length.len0 = MAX_SIZE;
431 buffers[0].pcie_length.len1 = MAX_SIZE;
432 buffers[0].pcie_length.len2 = MAX_SIZE;
433 buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
434 buffers[1].u64 = address;
435 address += MAX_SIZE;
436 buffers[2].u64 = address;
437 address += MAX_SIZE;
438 buffers[3].u64 = address;
439 address += MAX_SIZE;
440 buffers[4].u64 = address;
441 segments+=4;
442 break;
443 }
444 else
445 {
446 /* Five or more segments are needed */
447 buffers[0].pcie_length.len0 = MAX_SIZE;
448 buffers[0].pcie_length.len1 = MAX_SIZE;
449 buffers[0].pcie_length.len2 = MAX_SIZE;
450 buffers[0].pcie_length.len3 = MAX_SIZE;
451 buffers[1].u64 = address;
452 address += MAX_SIZE;
453 buffers[2].u64 = address;
454 address += MAX_SIZE;
455 buffers[3].u64 = address;
456 address += MAX_SIZE;
457 buffers[4].u64 = address;
458 address += MAX_SIZE;
459 size -= MAX_SIZE*4;
460 buffers += 5;
461 segments+=4;
462 }
463 }
464 return segments;
465 }
466
467
468 /**
469 * Build the first and last pointers based on a DMA engine header
470 * and submit them to the engine. The purpose of this function is
471 * to simplify the building of DMA engine commands by automatically
472 * converting a simple address and size into the apropriate internal
473 * or PCI / PCIe address list. This function does not support gather lists,
474 * so you will need to build your own lists in that case.
475 *
476 * @param engine Engine to submit to (0 to cvmx_dma_engine_get_num()-1)
477 * @param header DMA Command header. Note that the nfst and nlst fields do not
478 * need to be filled in. All other fields must be set properly.
479 * @param first_address
480 * Address to use for the first pointers. In the case of INTERNAL,
481 * INBOUND, and OUTBOUND this is an Octeon memory address. In the
482 * case of EXTERNAL, this is the source PCI / PCIe address.
483 * @param last_address
484 * Address to use for the last pointers. In the case of EXTERNAL,
485 * INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
486 * case of INTERNAL, this is the Octeon memory destination address.
487 * @param size Size of the transfer to perform.
488 *
489 * @return Zero on success, negative on failure
490 */
cvmx_dma_engine_transfer(int engine,cvmx_dma_engine_header_t header,uint64_t first_address,uint64_t last_address,int size)491 int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
492 uint64_t first_address, uint64_t last_address,
493 int size)
494 {
495 cvmx_dma_engine_buffer_t buffers[32];
496 int words = 0;
497
498 switch (header.s.type)
499 {
500 case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
501 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
502 words += header.s.nfst;
503 header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
504 words += header.s.nlst;
505 break;
506 case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
507 case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
508 header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
509 words += header.s.nfst;
510 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
511 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
512 break;
513 case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
514 header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
515 words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
516 header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
517 words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
518 break;
519 }
520 return cvmx_dma_engine_submit(engine, header, words, buffers);
521 }
522
523 #endif
524