1 /**************************************************************************
2
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13 contributors may be used to endorse or promote products derived from
14 this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27
28 ***************************************************************************/
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: stable/9/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c 237920 2012-07-01 12:00:36Z np $");
31
32 #include "opt_inet.h"
33
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/bus.h>
39 #include <sys/pciio.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/ioccom.h>
46 #include <sys/mbuf.h>
47 #include <sys/rwlock.h>
48 #include <sys/linker.h>
49 #include <sys/firmware.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sockopt.h>
53 #include <sys/sockio.h>
54 #include <sys/smp.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57 #include <sys/queue.h>
58 #include <sys/taskqueue.h>
59 #include <sys/proc.h>
60 #include <sys/queue.h>
61
62 #include <vm/vm.h>
63 #include <vm/pmap.h>
64
65 #include <net/route.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/in.h>
68 #include <netinet/in_pcb.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip_var.h>
71 #include <netinet/tcp_var.h>
72 #include <netinet/toecore.h>
73 #include <netinet/tcp.h>
74 #include <netinet/tcpip.h>
75
76 #include <rdma/ib_verbs.h>
77 #include <linux/idr.h>
78 #include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
79
80 #include <cxgb_include.h>
81 #include <ulp/tom/cxgb_l2t.h>
82 #include <ulp/tom/cxgb_tom.h>
83 #include <ulp/tom/cxgb_toepcb.h>
84 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
85 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
86 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
87 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
88 #include <ulp/iw_cxgb/iw_cxgb.h>
89 #include <ulp/iw_cxgb/iw_cxgb_resource.h>
90
91 /* Response queue used for RDMA events. */
92 #define ASYNC_NOTIF_RSPQ 0
93 static inline int
cxio_rdma_cq_setup(struct cxio_rdev * rdev_p,unsigned id,uint64_t base_addr,unsigned size,unsigned ovfl_mode,unsigned credits,unsigned credit_thres)94 cxio_rdma_cq_setup(struct cxio_rdev *rdev_p, unsigned id, uint64_t base_addr,
95 unsigned size, unsigned ovfl_mode, unsigned credits, unsigned credit_thres)
96 {
97 struct adapter *sc = rdev_p->adap;
98 int rc;
99
100 mtx_lock_spin(&sc->sge.reg_lock);
101 rc = -t3_sge_init_cqcntxt(sc, id, base_addr, size, ASYNC_NOTIF_RSPQ,
102 ovfl_mode, credits, credit_thres);
103 mtx_unlock_spin(&sc->sge.reg_lock);
104
105 return (rc);
106 }
107
108 int
cxio_hal_cq_op(struct cxio_rdev * rdev_p,struct t3_cq * cq,enum t3_cq_opcode op,u32 credit)109 cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
110 enum t3_cq_opcode op, u32 credit)
111 {
112 int ret;
113 struct t3_cqe *cqe;
114 u32 rptr;
115 struct adapter *sc = rdev_p->adap;
116
117 if (op != CQ_CREDIT_UPDATE)
118 credit = 0;
119
120 mtx_lock_spin(&sc->sge.reg_lock);
121 ret = t3_sge_cqcntxt_op(sc, cq->cqid, op, credit);
122 mtx_unlock_spin(&sc->sge.reg_lock);
123
124 if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
125 return (ret);
126
127 /*
128 * If the rearm returned an index other than our current index,
129 * then there might be CQE's in flight (being DMA'd). We must wait
130 * here for them to complete or the consumer can miss a notification.
131 */
132 if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
133 int i=0;
134
135 rptr = cq->rptr;
136
137 /*
138 * Keep the generation correct by bumping rptr until it
139 * matches the index returned by the rearm - 1.
140 */
141 while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
142 rptr++;
143
144 /*
145 * Now rptr is the index for the (last) cqe that was
146 * in-flight at the time the HW rearmed the CQ. We
147 * spin until that CQE is valid.
148 */
149 cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
150 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
151 DELAY(1);
152 if (i++ > 1000000) {
153 struct adapter *sc = rdev_p->adap;
154
155 log(LOG_ERR, "%s: stalled rnic\n",
156 device_get_nameunit(sc->dev));
157 PANIC_IF(1);
158 return (-EIO);
159 }
160 }
161
162 return (1);
163 }
164
165 return (0);
166 }
167
168 static int
cxio_hal_clear_cq_ctx(struct cxio_rdev * rdev_p,u32 cqid)169 cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
170 {
171
172 return (cxio_rdma_cq_setup(rdev_p, cqid, 0, 0, 0, 0, 0));
173 }
174
175 static int
cxio_hal_clear_qp_ctx(struct cxio_rdev * rdev_p,u32 qpid)176 cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
177 {
178 u64 sge_cmd;
179 struct t3_modify_qp_wr *wqe;
180 struct mbuf *m;
181
182 m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
183 if (m == NULL) {
184 CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
185 return (-ENOMEM);
186 }
187 wqe = mtod(m, struct t3_modify_qp_wr *);
188 memset(wqe, 0, sizeof(*wqe));
189 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
190 wqe->flags = htobe32(MODQP_WRITE_EC);
191 sge_cmd = qpid << 8 | 3;
192 wqe->sge_cmd = htobe64(sge_cmd);
193 return t3_offload_tx(rdev_p->adap, m);
194 }
195
196 int
cxio_create_cq(struct cxio_rdev * rdev_p,struct t3_cq * cq,int kernel)197 cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
198 {
199 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
200
201 size += 1; /* one extra page for storing cq-in-err state */
202 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
203 if (!cq->cqid)
204 return (-ENOMEM);
205 if (kernel) {
206 cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO);
207 if (!cq->sw_queue)
208 return (-ENOMEM);
209 }
210
211 cq->queue = contigmalloc(size,
212 M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
213 if (cq->queue)
214 cq->dma_addr = vtophys(cq->queue);
215 else {
216 free(cq->sw_queue, M_DEVBUF);
217 return (-ENOMEM);
218 }
219 memset(cq->queue, 0, size);
220
221 return (cxio_rdma_cq_setup(rdev_p, cq->cqid, cq->dma_addr,
222 1UL << cq->size_log2, 0, 65535, 1));
223 }
224
225 static u32
get_qpid(struct cxio_rdev * rdev_p,struct cxio_ucontext * uctx)226 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
227 {
228 struct cxio_qpid *entry;
229 u32 qpid;
230 int i;
231
232 mtx_lock(&uctx->lock);
233 if (!TAILQ_EMPTY(&uctx->qpids)) {
234
235 entry = TAILQ_FIRST(&uctx->qpids);
236 TAILQ_REMOVE(&uctx->qpids, entry, entry);
237 qpid = entry->qpid;
238 free(entry, M_DEVBUF);
239 } else {
240 qpid = cxio_hal_get_qpid(rdev_p->rscp);
241 if (!qpid)
242 goto out;
243 for (i = qpid+1; i & rdev_p->qpmask; i++) {
244 entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT);
245 if (!entry)
246 break;
247 entry->qpid = i;
248 TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry);
249 }
250 }
251 out:
252 mtx_unlock(&uctx->lock);
253 CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
254 return qpid;
255 }
256
257 static void
put_qpid(struct cxio_rdev * rdev_p,u32 qpid,struct cxio_ucontext * uctx)258 put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
259 struct cxio_ucontext *uctx)
260 {
261 struct cxio_qpid *entry;
262
263 entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT);
264 CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid);
265 entry->qpid = qpid;
266 mtx_lock(&uctx->lock);
267 TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry);
268 mtx_unlock(&uctx->lock);
269 }
270
271 void
cxio_release_ucontext(struct cxio_rdev * rdev_p,struct cxio_ucontext * uctx)272 cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
273 {
274 struct cxio_qpid *pos, *tmp;
275
276 mtx_lock(&uctx->lock);
277 TAILQ_FOREACH_SAFE(pos, &uctx->qpids, entry, tmp) {
278 TAILQ_REMOVE(&uctx->qpids, pos, entry);
279 if (!(pos->qpid & rdev_p->qpmask))
280 cxio_hal_put_qpid(rdev_p->rscp, pos->qpid);
281 free(pos, M_DEVBUF);
282 }
283 mtx_unlock(&uctx->lock);
284 }
285
286 void
cxio_init_ucontext(struct cxio_rdev * rdev_p,struct cxio_ucontext * uctx)287 cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
288 {
289 TAILQ_INIT(&uctx->qpids);
290 mtx_init(&uctx->lock, "cxio uctx", NULL, MTX_DEF|MTX_DUPOK);
291 }
292
293 int
cxio_create_qp(struct cxio_rdev * rdev_p,u32 kernel_domain,struct t3_wq * wq,struct cxio_ucontext * uctx)294 cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
295 struct t3_wq *wq, struct cxio_ucontext *uctx)
296 {
297 int depth = 1UL << wq->size_log2;
298 int rqsize = 1UL << wq->rq_size_log2;
299
300 wq->qpid = get_qpid(rdev_p, uctx);
301 if (!wq->qpid)
302 return (-ENOMEM);
303
304 wq->rq = malloc(depth * sizeof(struct t3_swrq), M_DEVBUF, M_NOWAIT|M_ZERO);
305 if (!wq->rq)
306 goto err1;
307
308 wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
309 if (!wq->rq_addr)
310 goto err2;
311
312 wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO);
313 if (!wq->sq)
314 goto err3;
315 wq->queue = contigmalloc(depth *sizeof(union t3_wr),
316 M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
317 if (wq->queue)
318 wq->dma_addr = vtophys(wq->queue);
319 else
320 goto err4;
321
322 memset(wq->queue, 0, depth * sizeof(union t3_wr));
323 wq->doorbell = rdev_p->rnic_info.kdb_addr;
324 if (!kernel_domain)
325 wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
326 (wq->qpid << rdev_p->qpshift);
327 wq->rdev = rdev_p;
328 CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__,
329 wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
330 return 0;
331 err4:
332 free(wq->sq, M_DEVBUF);
333 err3:
334 cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
335 err2:
336 free(wq->rq, M_DEVBUF);
337 err1:
338 put_qpid(rdev_p, wq->qpid, uctx);
339 return (-ENOMEM);
340 }
341
342 int
cxio_destroy_cq(struct cxio_rdev * rdev_p,struct t3_cq * cq)343 cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
344 {
345 int err;
346 err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
347 free(cq->sw_queue, M_DEVBUF);
348 #if 0
349 dma_free_coherent(&(rdev_p->rnic_info.pdev),
350 (1UL << (cq->size_log2))
351 * sizeof(struct t3_cqe), cq->queue,
352 /* pci_unmap_addr(cq, mapping)*/ 0);
353 #else
354 contigfree(cq->queue,(1UL << (cq->size_log2))
355 * sizeof(struct t3_cqe), M_DEVBUF);
356 #endif
357 cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
358 return err;
359 }
360
361 int
cxio_destroy_qp(struct cxio_rdev * rdev_p,struct t3_wq * wq,struct cxio_ucontext * uctx)362 cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
363 struct cxio_ucontext *uctx)
364 {
365
366 #if 0
367 dma_free_coherent(&(rdev_p->rnic_info.pdev),
368 (1UL << (wq->size_log2))
369 * sizeof(union t3_wr), wq->queue,
370 /* pci_unmap_addr(wq, mapping)*/ 0);
371 #else
372 contigfree(wq->queue, (1UL << (wq->size_log2))
373 * sizeof(union t3_wr), M_DEVBUF);
374 #endif
375 free(wq->sq, M_DEVBUF);
376 cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
377 free(wq->rq, M_DEVBUF);
378 put_qpid(rdev_p, wq->qpid, uctx);
379 return 0;
380 }
381
382 static void
insert_recv_cqe(struct t3_wq * wq,struct t3_cq * cq)383 insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
384 {
385 struct t3_cqe cqe;
386
387 CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__,
388 wq, cq, cq->sw_rptr, cq->sw_wptr);
389 memset(&cqe, 0, sizeof(cqe));
390 cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
391 V_CQE_OPCODE(T3_SEND) |
392 V_CQE_TYPE(0) |
393 V_CQE_SWCQE(1) |
394 V_CQE_QPID(wq->qpid) |
395 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
396 cq->size_log2)));
397 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
398 cq->sw_wptr++;
399 }
400
401 int
cxio_flush_rq(struct t3_wq * wq,struct t3_cq * cq,int count)402 cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
403 {
404 u32 ptr;
405 int flushed = 0;
406
407 CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq);
408
409 /* flush RQ */
410 CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__,
411 wq->rq_rptr, wq->rq_wptr, count);
412 ptr = wq->rq_rptr + count;
413 while (ptr++ != wq->rq_wptr) {
414 insert_recv_cqe(wq, cq);
415 flushed++;
416 }
417 return flushed;
418 }
419
420 static void
insert_sq_cqe(struct t3_wq * wq,struct t3_cq * cq,struct t3_swsq * sqp)421 insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
422 struct t3_swsq *sqp)
423 {
424 struct t3_cqe cqe;
425
426 CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__,
427 wq, cq, cq->sw_rptr, cq->sw_wptr);
428 memset(&cqe, 0, sizeof(cqe));
429 cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
430 V_CQE_OPCODE(sqp->opcode) |
431 V_CQE_TYPE(1) |
432 V_CQE_SWCQE(1) |
433 V_CQE_QPID(wq->qpid) |
434 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
435 cq->size_log2)));
436 cqe.u.scqe.wrid_hi = sqp->sq_wptr;
437
438 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
439 cq->sw_wptr++;
440 }
441
442 int
cxio_flush_sq(struct t3_wq * wq,struct t3_cq * cq,int count)443 cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
444 {
445 __u32 ptr;
446 int flushed = 0;
447 struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
448
449 ptr = wq->sq_rptr + count;
450 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
451 while (ptr != wq->sq_wptr) {
452 insert_sq_cqe(wq, cq, sqp);
453 ptr++;
454 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
455 flushed++;
456 }
457 return flushed;
458 }
459
460 /*
461 * Move all CQEs from the HWCQ into the SWCQ.
462 */
463 void
cxio_flush_hw_cq(struct t3_cq * cq)464 cxio_flush_hw_cq(struct t3_cq *cq)
465 {
466 struct t3_cqe *cqe, *swcqe;
467
468 CTR3(KTR_IW_CXGB, "%s cq %p cqid 0x%x", __FUNCTION__, cq, cq->cqid);
469 cqe = cxio_next_hw_cqe(cq);
470 while (cqe) {
471 CTR3(KTR_IW_CXGB, "%s flushing hwcq rptr 0x%x to swcq wptr 0x%x",
472 __FUNCTION__, cq->rptr, cq->sw_wptr);
473 swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
474 *swcqe = *cqe;
475 swcqe->header |= htobe32(V_CQE_SWCQE(1));
476 cq->sw_wptr++;
477 cq->rptr++;
478 cqe = cxio_next_hw_cqe(cq);
479 }
480 }
481
cqe_completes_wr(struct t3_cqe * cqe,struct t3_wq * wq)482 static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
483 {
484 if (CQE_OPCODE(*cqe) == T3_TERMINATE)
485 return 0;
486
487 if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
488 return 0;
489
490 if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
491 return 0;
492
493 if (CQE_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
494 Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
495 return 0;
496
497 return 1;
498 }
499
500 void
cxio_count_scqes(struct t3_cq * cq,struct t3_wq * wq,int * count)501 cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
502 {
503 struct t3_cqe *cqe;
504 u32 ptr;
505
506 *count = 0;
507 ptr = cq->sw_rptr;
508 while (!Q_EMPTY(ptr, cq->sw_wptr)) {
509 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
510 if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
511 (CQE_QPID(*cqe) == wq->qpid))
512 (*count)++;
513 ptr++;
514 }
515 CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count);
516 }
517
518 void
cxio_count_rcqes(struct t3_cq * cq,struct t3_wq * wq,int * count)519 cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
520 {
521 struct t3_cqe *cqe;
522 u32 ptr;
523
524 *count = 0;
525 CTR2(KTR_IW_CXGB, "%s count zero %d", __FUNCTION__, *count);
526 ptr = cq->sw_rptr;
527 while (!Q_EMPTY(ptr, cq->sw_wptr)) {
528 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
529 if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
530 (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
531 (*count)++;
532 ptr++;
533 }
534 CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count);
535 }
536
537 static int
cxio_hal_init_ctrl_cq(struct cxio_rdev * rdev_p)538 cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
539 {
540
541 return (cxio_rdma_cq_setup(rdev_p, 0, 0, 1, 1, 0, 0));
542 }
543
544 static int
cxio_hal_init_ctrl_qp(struct cxio_rdev * rdev_p)545 cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
546 {
547 int err;
548 u64 sge_cmd, ctx0, ctx1;
549 u64 base_addr;
550 struct t3_modify_qp_wr *wqe;
551 struct mbuf *m;
552
553 m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, wqe);
554 if (m == NULL) {
555 CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__);
556 return (ENOMEM);
557 }
558 err = cxio_hal_init_ctrl_cq(rdev_p);
559 if (err) {
560 CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err);
561 goto err;
562 }
563
564 rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2)
565 *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
566 if (rdev_p->ctrl_qp.workq)
567 rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq);
568 else {
569 CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__);
570 err = ENOMEM;
571 goto err;
572 }
573
574 rdev_p->ctrl_qp.doorbell = rdev_p->rnic_info.kdb_addr;
575 memset(rdev_p->ctrl_qp.workq, 0,
576 (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
577
578 mtx_init(&rdev_p->ctrl_qp.lock, "ctl-qp lock", NULL, MTX_DEF|MTX_DUPOK);
579
580 /* update HW Ctrl QP context */
581 base_addr = rdev_p->ctrl_qp.dma_addr;
582 base_addr >>= 12;
583 ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
584 V_EC_BASE_LO((u32) base_addr & 0xffff));
585 ctx0 <<= 32;
586 ctx0 |= V_EC_CREDITS(FW_WR_NUM);
587 base_addr >>= 16;
588 ctx1 = (u32) base_addr;
589 base_addr >>= 32;
590 ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
591 V_EC_TYPE(0) | V_EC_GEN(1) |
592 V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
593 memset(wqe, 0, sizeof(*wqe));
594 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
595 T3_CTL_QP_TID, 7);
596 wqe->flags = htobe32(MODQP_WRITE_EC);
597 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
598 wqe->sge_cmd = htobe64(sge_cmd);
599 wqe->ctx1 = htobe64(ctx1);
600 wqe->ctx0 = htobe64(ctx0);
601 CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d",
602 (unsigned long long) rdev_p->ctrl_qp.dma_addr,
603 rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
604 return t3_offload_tx(rdev_p->adap, m);
605 err:
606 m_freem(m);
607 return err;
608 }
609
610 static int
cxio_hal_destroy_ctrl_qp(struct cxio_rdev * rdev_p)611 cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
612 {
613 #if 0
614
615 dma_free_coherent(&(rdev_p->rnic_info.pdev),
616 (1UL << T3_CTRL_QP_SIZE_LOG2)
617 * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
618 /* pci_unmap_addr(&rdev_p->ctrl_qp, mapping)*/ 0);
619 #else
620 contigfree(rdev_p->ctrl_qp.workq,(1UL << T3_CTRL_QP_SIZE_LOG2)
621 * sizeof(union t3_wr), M_DEVBUF);
622 #endif
623 return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
624 }
625
626 /* write len bytes of data into addr (32B aligned address)
627 * If data is NULL, clear len byte of memory to zero.
628 * caller aquires the ctrl_qp lock before the call
629 */
630 static int
cxio_hal_ctrl_qp_write_mem(struct cxio_rdev * rdev_p,u32 addr,u32 len,void * data)631 cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
632 u32 len, void *data)
633 {
634 u32 i, nr_wqe, copy_len;
635 u8 *copy_data;
636 u8 wr_len, utx_len; /* lenght in 8 byte flit */
637 enum t3_wr_flags flag;
638 __be64 *wqe;
639 u64 utx_cmd;
640 addr &= 0x7FFFFFF;
641 nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
642 CTR6(KTR_IW_CXGB, "cxio_hal_ctrl_qp_write_mem wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x",
643 rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
644 nr_wqe, data, addr);
645 utx_len = 3; /* in 32B unit */
646 for (i = 0; i < nr_wqe; i++) {
647 if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
648 T3_CTRL_QP_SIZE_LOG2)) {
649 CTR4(KTR_IW_CXGB, "%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
650 "wait for more space i %d", __FUNCTION__,
651 rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
652 if (cxio_wait(&rdev_p->ctrl_qp,
653 &rdev_p->ctrl_qp.lock,
654 !Q_FULL(rdev_p->ctrl_qp.rptr,
655 rdev_p->ctrl_qp.wptr,
656 T3_CTRL_QP_SIZE_LOG2))) {
657 CTR1(KTR_IW_CXGB, "%s ctrl_qp workq interrupted",
658 __FUNCTION__);
659 return (-ERESTART);
660 }
661 CTR2(KTR_IW_CXGB, "%s ctrl_qp wakeup, continue posting work request "
662 "i %d", __FUNCTION__, i);
663 }
664 wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
665 (1 << T3_CTRL_QP_SIZE_LOG2)));
666 flag = 0;
667 if (i == (nr_wqe - 1)) {
668 /* last WQE */
669 flag = T3_COMPLETION_FLAG;
670 if (len % 32)
671 utx_len = len / 32 + 1;
672 else
673 utx_len = len / 32;
674 }
675
676 /*
677 * Force a CQE to return the credit to the workq in case
678 * we posted more than half the max QP size of WRs
679 */
680 if ((i != 0) &&
681 (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
682 flag = T3_COMPLETION_FLAG;
683 CTR2(KTR_IW_CXGB, "%s force completion at i %d", __FUNCTION__, i);
684 }
685
686 /* build the utx mem command */
687 wqe += (sizeof(struct t3_bypass_wr) >> 3);
688 utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
689 utx_cmd <<= 32;
690 utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
691 *wqe = htobe64(utx_cmd);
692 wqe++;
693 copy_data = (u8 *) data + i * 96;
694 copy_len = len > 96 ? 96 : len;
695
696 /* clear memory content if data is NULL */
697 if (data)
698 memcpy(wqe, copy_data, copy_len);
699 else
700 memset(wqe, 0, copy_len);
701 if (copy_len % 32)
702 memset(((u8 *) wqe) + copy_len, 0,
703 32 - (copy_len % 32));
704 wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
705 (utx_len << 2);
706 wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
707 (1 << T3_CTRL_QP_SIZE_LOG2)));
708
709 /* wptr in the WRID[31:0] */
710 ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
711
712 /*
713 * This must be the last write with a memory barrier
714 * for the genbit
715 */
716 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
717 Q_GENBIT(rdev_p->ctrl_qp.wptr,
718 T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
719 wr_len);
720 if (flag == T3_COMPLETION_FLAG)
721 ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
722
723 len -= 96;
724 rdev_p->ctrl_qp.wptr++;
725 }
726 return 0;
727 }
728
729 /* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
730 * OUT: stag index, actual pbl_size, pbl_addr allocated.
731 * TBD: shared memory region support
732 */
733 static int
__cxio_tpt_op(struct cxio_rdev * rdev_p,u32 reset_tpt_entry,u32 * stag,u8 stag_state,u32 pdid,enum tpt_mem_type type,enum tpt_mem_perm perm,u32 zbva,u64 to,u32 len,u8 page_size,u32 pbl_size,u32 pbl_addr)734 __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
735 u32 *stag, u8 stag_state, u32 pdid,
736 enum tpt_mem_type type, enum tpt_mem_perm perm,
737 u32 zbva, u64 to, u32 len, u8 page_size,
738 u32 pbl_size, u32 pbl_addr)
739 {
740 int err;
741 struct tpt_entry tpt;
742 u32 stag_idx;
743 u32 wptr;
744
745 stag_state = stag_state > 0;
746 stag_idx = (*stag) >> 8;
747
748 if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
749 stag_idx = cxio_hal_get_stag(rdev_p->rscp);
750 if (!stag_idx)
751 return (-ENOMEM);
752 *stag = (stag_idx << 8) | ((*stag) & 0xFF);
753 }
754 CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
755 __FUNCTION__, stag_state, type, pdid, stag_idx);
756
757 mtx_lock(&rdev_p->ctrl_qp.lock);
758
759 /* write TPT entry */
760 if (reset_tpt_entry)
761 memset(&tpt, 0, sizeof(tpt));
762 else {
763 tpt.valid_stag_pdid = htobe32(F_TPT_VALID |
764 V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
765 V_TPT_STAG_STATE(stag_state) |
766 V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
767 PANIC_IF(page_size >= 28);
768 tpt.flags_pagesize_qpid = htobe32(V_TPT_PERM(perm) |
769 F_TPT_MW_BIND_ENABLE |
770 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
771 V_TPT_PAGE_SIZE(page_size));
772 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
773 htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
774 tpt.len = htobe32(len);
775 tpt.va_hi = htobe32((u32) (to >> 32));
776 tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL));
777 tpt.rsvd_bind_cnt_or_pstag = 0;
778 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
779 htobe32(V_TPT_PBL_SIZE((pbl_size) >> 2));
780 }
781 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
782 stag_idx +
783 (rdev_p->rnic_info.tpt_base >> 5),
784 sizeof(tpt), &tpt);
785
786 /* release the stag index to free pool */
787 if (reset_tpt_entry)
788 cxio_hal_put_stag(rdev_p->rscp, stag_idx);
789
790 wptr = rdev_p->ctrl_qp.wptr;
791 mtx_unlock(&rdev_p->ctrl_qp.lock);
792 if (!err)
793 if (cxio_wait(&rdev_p->ctrl_qp,
794 &rdev_p->ctrl_qp.lock,
795 SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
796 return (-ERESTART);
797 return err;
798 }
799
cxio_write_pbl(struct cxio_rdev * rdev_p,__be64 * pbl,u32 pbl_addr,u32 pbl_size)800 int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
801 u32 pbl_addr, u32 pbl_size)
802 {
803 u32 wptr;
804 int err;
805
806 CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
807 __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
808 pbl_size);
809
810 mtx_lock(&rdev_p->ctrl_qp.lock);
811 err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
812 pbl);
813 wptr = rdev_p->ctrl_qp.wptr;
814 mtx_unlock(&rdev_p->ctrl_qp.lock);
815 if (err)
816 return err;
817
818 if (cxio_wait(&rdev_p->ctrl_qp,
819 &rdev_p->ctrl_qp.lock,
820 SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr)))
821 return ERESTART;
822
823 return 0;
824 }
825
826 int
cxio_register_phys_mem(struct cxio_rdev * rdev_p,u32 * stag,u32 pdid,enum tpt_mem_perm perm,u32 zbva,u64 to,u32 len,u8 page_size,u32 pbl_size,u32 pbl_addr)827 cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
828 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
829 u8 page_size, u32 pbl_size, u32 pbl_addr)
830 {
831 *stag = T3_STAG_UNSET;
832 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
833 zbva, to, len, page_size, pbl_size, pbl_addr);
834 }
835
836 int
cxio_reregister_phys_mem(struct cxio_rdev * rdev_p,u32 * stag,u32 pdid,enum tpt_mem_perm perm,u32 zbva,u64 to,u32 len,u8 page_size,u32 pbl_size,u32 pbl_addr)837 cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
838 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
839 u8 page_size, u32 pbl_size, u32 pbl_addr)
840 {
841 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
842 zbva, to, len, page_size, pbl_size, pbl_addr);
843 }
844
845 int
cxio_dereg_mem(struct cxio_rdev * rdev_p,u32 stag,u32 pbl_size,u32 pbl_addr)846 cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
847 u32 pbl_addr)
848 {
849 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
850 pbl_size, pbl_addr);
851 }
852
853 int
cxio_allocate_window(struct cxio_rdev * rdev_p,u32 * stag,u32 pdid)854 cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
855 {
856 *stag = T3_STAG_UNSET;
857 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
858 0, 0);
859 }
860
861 int
cxio_deallocate_window(struct cxio_rdev * rdev_p,u32 stag)862 cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
863 {
864 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
865 0, 0);
866 }
867
868 int
cxio_rdma_init(struct cxio_rdev * rdev_p,struct t3_rdma_init_attr * attr,struct socket * so)869 cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr,
870 struct socket *so)
871 {
872 struct t3_rdma_init_wr *wqe;
873 struct mbuf *m;
874 struct ofld_hdr *oh;
875 int rc;
876 struct tcpcb *tp;
877 struct inpcb *inp;
878 struct toepcb *toep;
879
880 m = M_GETHDR_OFLD(0, CPL_PRIORITY_DATA, wqe);
881 if (m == NULL)
882 return (-ENOMEM);
883 CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p);
884 wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT));
885 wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) |
886 V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
887 wqe->wrid.id1 = 0;
888 wqe->qpid = htobe32(attr->qpid);
889 wqe->pdid = htobe32(attr->pdid);
890 wqe->scqid = htobe32(attr->scqid);
891 wqe->rcqid = htobe32(attr->rcqid);
892 wqe->rq_addr = htobe32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
893 wqe->rq_size = htobe32(attr->rq_size);
894 wqe->mpaattrs = attr->mpaattrs;
895 wqe->qpcaps = attr->qpcaps;
896 wqe->ulpdu_size = htobe16(attr->tcp_emss);
897 wqe->rqe_count = htobe16(attr->rqe_count);
898 wqe->flags_rtr_type = htobe16(attr->flags |
899 V_RTR_TYPE(attr->rtr_type) |
900 V_CHAN(attr->chan));
901 wqe->ord = htobe32(attr->ord);
902 wqe->ird = htobe32(attr->ird);
903 wqe->qp_dma_addr = htobe64(attr->qp_dma_addr);
904 wqe->qp_dma_size = htobe32(attr->qp_dma_size);
905 wqe->irs = htobe32(attr->irs);
906
907 /* XXX: bad form, fix later */
908 inp = sotoinpcb(so);
909 INP_WLOCK(inp);
910 tp = intotcpcb(inp);
911 toep = tp->t_toe;
912 oh = mtod(m, struct ofld_hdr *);
913 oh->plen = 0;
914 oh->flags |= F_HDR_DF;
915 enqueue_wr(toep, m);
916 toep->tp_wr_avail--;
917 toep->tp_wr_unacked++;
918 rc = t3_offload_tx(rdev_p->adap, m);
919 INP_WUNLOCK(inp);
920
921 return (rc);
922 }
923
924 static int
cxio_hal_ev_handler(struct sge_qset * qs,struct rsp_desc * r,struct mbuf * m)925 cxio_hal_ev_handler(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
926 {
927 struct adapter *sc = qs->adap;
928 struct iwch_dev *rnicp = sc->iwarp_softc;
929 struct cxio_rdev *rdev_p = &rnicp->rdev;
930 struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data;
931 int qpid = CQE_QPID(rsp_msg->cqe);
932
933 CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x",
934 __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
935 RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg));
936 CTR4(KTR_IW_CXGB, "se %0x notify %0x cqbranch %0x creditth %0x",
937 RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
938 RSPQ_CREDIT_THRESH(rsp_msg));
939 CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d",
940 qpid, CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
941 CQE_OPCODE(rsp_msg->cqe));
942 CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
943 CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
944
945 switch(qpid) {
946 case T3_CTRL_QP_ID:
947 mtx_lock(&rdev_p->ctrl_qp.lock);
948 rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
949 wakeup(&rdev_p->ctrl_qp);
950 mtx_unlock(&rdev_p->ctrl_qp.lock);
951 break;
952 case 0xfff8:
953 break;
954 default:
955 iwch_ev_dispatch(rnicp, m);
956 }
957
958 m_freem(m);
959 return (0);
960 }
961
962 /* Caller takes care of locking if needed */
963 int
cxio_rdev_open(struct cxio_rdev * rdev_p)964 cxio_rdev_open(struct cxio_rdev *rdev_p)
965 {
966 int err = 0;
967 struct rdma_info *ri = &rdev_p->rnic_info;
968 struct adapter *sc = rdev_p->adap;
969
970 KASSERT(rdev_p->adap, ("%s: adap is NULL", __func__));
971
972 memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
973
974 ri->udbell_physbase = rman_get_start(sc->udbs_res);
975 ri->udbell_len = rman_get_size(sc->udbs_res);
976 ri->tpt_base = t3_read_reg(sc, A_ULPTX_TPT_LLIMIT);
977 ri->tpt_top = t3_read_reg(sc, A_ULPTX_TPT_ULIMIT);
978 ri->pbl_base = t3_read_reg(sc, A_ULPTX_PBL_LLIMIT);
979 ri->pbl_top = t3_read_reg(sc, A_ULPTX_PBL_ULIMIT);
980 ri->rqt_base = t3_read_reg(sc, A_ULPRX_RQ_LLIMIT);
981 ri->rqt_top = t3_read_reg(sc, A_ULPRX_RQ_ULIMIT);
982 ri->kdb_addr = (void *)((unsigned long)
983 rman_get_virtual(sc->regs_res) + A_SG_KDOORBELL);
984
985 /*
986 * qpshift is the number of bits to shift the qpid left in order
987 * to get the correct address of the doorbell for that qp.
988 */
989 cxio_init_ucontext(rdev_p, &rdev_p->uctx);
990 rdev_p->qpshift = PAGE_SHIFT -
991 ilog2(65536 >>
992 ilog2(rdev_p->rnic_info.udbell_len >>
993 PAGE_SHIFT));
994 rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
995 rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
996 CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %p info: tpt_base 0x%0x tpt_top 0x%0x num stags %d",
997 rdev_p->adap, rdev_p->rnic_info.tpt_base,
998 rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p));
999 CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x",
1000 rdev_p->rnic_info.pbl_base,
1001 rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
1002 rdev_p->rnic_info.rqt_top);
1003 CTR6(KTR_IW_CXGB, "udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
1004 "qpnr %d qpmask 0x%x",
1005 rdev_p->rnic_info.udbell_len,
1006 rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
1007 rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
1008
1009 err = cxio_hal_init_ctrl_qp(rdev_p);
1010 if (err) {
1011 log(LOG_ERR, "%s error %d initializing ctrl_qp.\n",
1012 __FUNCTION__, err);
1013 goto err1;
1014 }
1015 err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
1016 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
1017 T3_MAX_NUM_PD);
1018 if (err) {
1019 log(LOG_ERR, "%s error %d initializing hal resources.\n",
1020 __FUNCTION__, err);
1021 goto err2;
1022 }
1023 err = cxio_hal_pblpool_create(rdev_p);
1024 if (err) {
1025 log(LOG_ERR, "%s error %d initializing pbl mem pool.\n",
1026 __FUNCTION__, err);
1027 goto err3;
1028 }
1029 err = cxio_hal_rqtpool_create(rdev_p);
1030 if (err) {
1031 log(LOG_ERR, "%s error %d initializing rqt mem pool.\n",
1032 __FUNCTION__, err);
1033 goto err4;
1034 }
1035 return 0;
1036 err4:
1037 cxio_hal_pblpool_destroy(rdev_p);
1038 err3:
1039 cxio_hal_destroy_resource(rdev_p->rscp);
1040 err2:
1041 cxio_hal_destroy_ctrl_qp(rdev_p);
1042 err1:
1043 return err;
1044 }
1045
1046 void
cxio_rdev_close(struct cxio_rdev * rdev_p)1047 cxio_rdev_close(struct cxio_rdev *rdev_p)
1048 {
1049 cxio_hal_pblpool_destroy(rdev_p);
1050 cxio_hal_rqtpool_destroy(rdev_p);
1051 cxio_hal_destroy_ctrl_qp(rdev_p);
1052 cxio_hal_destroy_resource(rdev_p->rscp);
1053 }
1054
1055 int
cxio_hal_init(struct adapter * sc)1056 cxio_hal_init(struct adapter *sc)
1057 {
1058 #ifdef needed
1059 if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
1060 return (ENOMEM);
1061 #endif
1062 t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
1063
1064 return (0);
1065 }
1066
1067 void
cxio_hal_uninit(struct adapter * sc)1068 cxio_hal_uninit(struct adapter *sc)
1069 {
1070 t3_register_cpl_handler(sc, CPL_ASYNC_NOTIF, NULL);
1071 #ifdef needed
1072 cxio_hal_destroy_rhdl_resource();
1073 #endif
1074 }
1075
1076 static void
flush_completed_wrs(struct t3_wq * wq,struct t3_cq * cq)1077 flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
1078 {
1079 struct t3_swsq *sqp;
1080 __u32 ptr = wq->sq_rptr;
1081 int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
1082
1083 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
1084 while (count--)
1085 if (!sqp->signaled) {
1086 ptr++;
1087 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
1088 } else if (sqp->complete) {
1089
1090 /*
1091 * Insert this completed cqe into the swcq.
1092 */
1093 CTR3(KTR_IW_CXGB, "%s moving cqe into swcq sq idx %ld cq idx %ld",
1094 __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2),
1095 Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
1096 sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
1097 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
1098 = sqp->cqe;
1099 cq->sw_wptr++;
1100 sqp->signaled = 0;
1101 break;
1102 } else
1103 break;
1104 }
1105
1106 static void
create_read_req_cqe(struct t3_wq * wq,struct t3_cqe * hw_cqe,struct t3_cqe * read_cqe)1107 create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
1108 struct t3_cqe *read_cqe)
1109 {
1110 read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
1111 read_cqe->len = wq->oldest_read->read_len;
1112 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
1113 V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
1114 V_CQE_OPCODE(T3_READ_REQ) |
1115 V_CQE_TYPE(1));
1116 }
1117
1118 /*
1119 * Return a ptr to the next read wr in the SWSQ or NULL.
1120 */
1121 static void
advance_oldest_read(struct t3_wq * wq)1122 advance_oldest_read(struct t3_wq *wq)
1123 {
1124
1125 u32 rptr = wq->oldest_read - wq->sq + 1;
1126 u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
1127
1128 while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
1129 wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
1130
1131 if (wq->oldest_read->opcode == T3_READ_REQ)
1132 return;
1133 rptr++;
1134 }
1135 wq->oldest_read = NULL;
1136 }
1137
1138 /*
1139 * cxio_poll_cq
1140 *
1141 * Caller must:
1142 * check the validity of the first CQE,
1143 * supply the wq assicated with the qpid.
1144 *
1145 * credit: cq credit to return to sge.
1146 * cqe_flushed: 1 iff the CQE is flushed.
1147 * cqe: copy of the polled CQE.
1148 *
1149 * return value:
1150 * 0 CQE returned,
1151 * -1 CQE skipped, try again.
1152 */
1153 int
cxio_poll_cq(struct t3_wq * wq,struct t3_cq * cq,struct t3_cqe * cqe,u8 * cqe_flushed,u64 * cookie,u32 * credit)1154 cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
1155 u8 *cqe_flushed, u64 *cookie, u32 *credit)
1156 {
1157 int ret = 0;
1158 struct t3_cqe *hw_cqe, read_cqe;
1159
1160 *cqe_flushed = 0;
1161 *credit = 0;
1162 hw_cqe = cxio_next_cqe(cq);
1163
1164 CTR5(KTR_IW_CXGB, "cxio_poll_cq CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x",
1165 CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
1166 CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe));
1167 CTR4(KTR_IW_CXGB, "opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
1168 CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
1169 CQE_WRID_LOW(*hw_cqe));
1170
1171 /*
1172 * skip cqe's not affiliated with a QP.
1173 */
1174 if (wq == NULL) {
1175 ret = -1;
1176 goto skip_cqe;
1177 }
1178
1179 /*
1180 * Gotta tweak READ completions:
1181 * 1) the cqe doesn't contain the sq_wptr from the wr.
1182 * 2) opcode not reflected from the wr.
1183 * 3) read_len not reflected from the wr.
1184 * 4) cq_type is RQ_TYPE not SQ_TYPE.
1185 */
1186 if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
1187
1188 /*
1189 * Don't write to the HWCQ, so create a new read req CQE
1190 * in local memory.
1191 */
1192 create_read_req_cqe(wq, hw_cqe, &read_cqe);
1193 hw_cqe = &read_cqe;
1194 advance_oldest_read(wq);
1195 }
1196
1197 /*
1198 * T3A: Discard TERMINATE CQEs.
1199 */
1200 if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
1201 ret = -1;
1202 wq->error = 1;
1203 goto skip_cqe;
1204 }
1205
1206 if (CQE_STATUS(*hw_cqe) || wq->error) {
1207 *cqe_flushed = wq->error;
1208 wq->error = 1;
1209
1210 /*
1211 * T3A inserts errors into the CQE. We cannot return
1212 * these as work completions.
1213 */
1214 /* incoming write failures */
1215 if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
1216 && RQ_TYPE(*hw_cqe)) {
1217 ret = -1;
1218 goto skip_cqe;
1219 }
1220 /* incoming read request failures */
1221 if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
1222 ret = -1;
1223 goto skip_cqe;
1224 }
1225
1226 /* incoming SEND with no receive posted failures */
1227 if (CQE_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
1228 Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
1229 ret = -1;
1230 goto skip_cqe;
1231 }
1232 PANIC_IF((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
1233 goto proc_cqe;
1234 }
1235
1236 /*
1237 * RECV completion.
1238 */
1239 if (RQ_TYPE(*hw_cqe)) {
1240
1241 /*
1242 * HW only validates 4 bits of MSN. So we must validate that
1243 * the MSN in the SEND is the next expected MSN. If its not,
1244 * then we complete this with TPT_ERR_MSN and mark the wq in
1245 * error.
1246 */
1247
1248 if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
1249 wq->error = 1;
1250 ret = -1;
1251 goto skip_cqe;
1252 }
1253
1254 if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
1255 wq->error = 1;
1256 hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
1257 goto proc_cqe;
1258 }
1259 goto proc_cqe;
1260 }
1261
1262 /*
1263 * If we get here its a send completion.
1264 *
1265 * Handle out of order completion. These get stuffed
1266 * in the SW SQ. Then the SW SQ is walked to move any
1267 * now in-order completions into the SW CQ. This handles
1268 * 2 cases:
1269 * 1) reaping unsignaled WRs when the first subsequent
1270 * signaled WR is completed.
1271 * 2) out of order read completions.
1272 */
1273 if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
1274 struct t3_swsq *sqp;
1275
1276 CTR2(KTR_IW_CXGB, "%s out of order completion going in swsq at idx %ld",
1277 __FUNCTION__,
1278 Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
1279 sqp = wq->sq +
1280 Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
1281 sqp->cqe = *hw_cqe;
1282 sqp->complete = 1;
1283 ret = -1;
1284 goto flush_wq;
1285 }
1286
1287 proc_cqe:
1288 *cqe = *hw_cqe;
1289
1290 /*
1291 * Reap the associated WR(s) that are freed up with this
1292 * completion.
1293 */
1294 if (SQ_TYPE(*hw_cqe)) {
1295 wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
1296 CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__,
1297 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
1298 *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
1299 wq->sq_rptr++;
1300 } else {
1301 CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__,
1302 Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
1303 *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
1304 if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
1305 cxio_hal_pblpool_free(wq->rdev,
1306 wq->rq[Q_PTR2IDX(wq->rq_rptr,
1307 wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
1308 PANIC_IF(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
1309 wq->rq_rptr++;
1310 }
1311
1312 flush_wq:
1313 /*
1314 * Flush any completed cqes that are now in-order.
1315 */
1316 flush_completed_wrs(wq, cq);
1317
1318 skip_cqe:
1319 if (SW_CQE(*hw_cqe)) {
1320 CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x",
1321 __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
1322 ++cq->sw_rptr;
1323 } else {
1324 CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip hw cqe rptr 0x%x",
1325 __FUNCTION__, cq, cq->cqid, cq->rptr);
1326 ++cq->rptr;
1327
1328 /*
1329 * T3A: compute credits.
1330 */
1331 if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
1332 || ((cq->rptr - cq->wptr) >= 128)) {
1333 *credit = cq->rptr - cq->wptr;
1334 cq->wptr = cq->rptr;
1335 }
1336 }
1337 return ret;
1338 }
1339 #endif
1340