1 /*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #if HAVE_CONFIG_H
35 # include <config.h>
36 #endif /* HAVE_CONFIG_H */
37
38 #include <stdlib.h>
39 #include <netinet/in.h>
40 #include <pthread.h>
41 #include <string.h>
42
43 #include "mthca.h"
44 #include "doorbell.h"
45 #include "wqe.h"
46
47 enum {
48 MTHCA_SEND_DOORBELL_FENCE = 1 << 5
49 };
50
51 static const uint8_t mthca_opcode[] = {
52 [IBV_WR_SEND] = MTHCA_OPCODE_SEND,
53 [IBV_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM,
54 [IBV_WR_RDMA_WRITE] = MTHCA_OPCODE_RDMA_WRITE,
55 [IBV_WR_RDMA_WRITE_WITH_IMM] = MTHCA_OPCODE_RDMA_WRITE_IMM,
56 [IBV_WR_RDMA_READ] = MTHCA_OPCODE_RDMA_READ,
57 [IBV_WR_ATOMIC_CMP_AND_SWP] = MTHCA_OPCODE_ATOMIC_CS,
58 [IBV_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
59 };
60
get_recv_wqe(struct mthca_qp * qp,int n)61 static void *get_recv_wqe(struct mthca_qp *qp, int n)
62 {
63 return qp->buf.buf + (n << qp->rq.wqe_shift);
64 }
65
get_send_wqe(struct mthca_qp * qp,int n)66 static void *get_send_wqe(struct mthca_qp *qp, int n)
67 {
68 return qp->buf.buf + qp->send_wqe_offset + (n << qp->sq.wqe_shift);
69 }
70
mthca_init_qp_indices(struct mthca_qp * qp)71 void mthca_init_qp_indices(struct mthca_qp *qp)
72 {
73 qp->sq.next_ind = 0;
74 qp->sq.last_comp = qp->sq.max - 1;
75 qp->sq.head = 0;
76 qp->sq.tail = 0;
77 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
78
79 qp->rq.next_ind = 0;
80 qp->rq.last_comp = qp->rq.max - 1;
81 qp->rq.head = 0;
82 qp->rq.tail = 0;
83 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
84 }
85
wq_overflow(struct mthca_wq * wq,int nreq,struct mthca_cq * cq)86 static inline int wq_overflow(struct mthca_wq *wq, int nreq, struct mthca_cq *cq)
87 {
88 unsigned cur;
89
90 cur = wq->head - wq->tail;
91 if (cur + nreq < wq->max)
92 return 0;
93
94 pthread_spin_lock(&cq->lock);
95 cur = wq->head - wq->tail;
96 pthread_spin_unlock(&cq->lock);
97
98 return cur + nreq >= wq->max;
99 }
100
mthca_tavor_post_send(struct ibv_qp * ibqp,struct ibv_send_wr * wr,struct ibv_send_wr ** bad_wr)101 int mthca_tavor_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
102 struct ibv_send_wr **bad_wr)
103 {
104 struct mthca_qp *qp = to_mqp(ibqp);
105 void *wqe, *prev_wqe;
106 int ind;
107 int nreq;
108 int ret = 0;
109 int size;
110 int size0 = 0;
111 int i;
112 /*
113 * f0 and op0 cannot be used unless nreq > 0, which means this
114 * function makes it through the loop at least once. So the
115 * code inside the if (!size0) will be executed, and f0 and
116 * op0 will be initialized. So any gcc warning about "may be
117 * used unitialized" is bogus.
118 */
119 uint32_t f0;
120 uint32_t op0;
121
122 pthread_spin_lock(&qp->sq.lock);
123
124 ind = qp->sq.next_ind;
125
126 for (nreq = 0; wr; ++nreq, wr = wr->next) {
127 if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
128 ret = -1;
129 *bad_wr = wr;
130 goto out;
131 }
132
133 wqe = get_send_wqe(qp, ind);
134 prev_wqe = qp->sq.last;
135 qp->sq.last = wqe;
136
137 ((struct mthca_next_seg *) wqe)->nda_op = 0;
138 ((struct mthca_next_seg *) wqe)->ee_nds = 0;
139 ((struct mthca_next_seg *) wqe)->flags =
140 ((wr->send_flags & IBV_SEND_SIGNALED) ?
141 htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
142 ((wr->send_flags & IBV_SEND_SOLICITED) ?
143 htonl(MTHCA_NEXT_SOLICIT) : 0) |
144 htonl(1);
145 if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
146 wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
147 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
148
149 wqe += sizeof (struct mthca_next_seg);
150 size = sizeof (struct mthca_next_seg) / 16;
151
152 switch (ibqp->qp_type) {
153 case IBV_QPT_RC:
154 switch (wr->opcode) {
155 case IBV_WR_ATOMIC_CMP_AND_SWP:
156 case IBV_WR_ATOMIC_FETCH_AND_ADD:
157 ((struct mthca_raddr_seg *) wqe)->raddr =
158 htonll(wr->wr.atomic.remote_addr);
159 ((struct mthca_raddr_seg *) wqe)->rkey =
160 htonl(wr->wr.atomic.rkey);
161 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
162
163 wqe += sizeof (struct mthca_raddr_seg);
164
165 if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
166 ((struct mthca_atomic_seg *) wqe)->swap_add =
167 htonll(wr->wr.atomic.swap);
168 ((struct mthca_atomic_seg *) wqe)->compare =
169 htonll(wr->wr.atomic.compare_add);
170 } else {
171 ((struct mthca_atomic_seg *) wqe)->swap_add =
172 htonll(wr->wr.atomic.compare_add);
173 ((struct mthca_atomic_seg *) wqe)->compare = 0;
174 }
175
176 wqe += sizeof (struct mthca_atomic_seg);
177 size += (sizeof (struct mthca_raddr_seg) +
178 sizeof (struct mthca_atomic_seg)) / 16;
179 break;
180
181 case IBV_WR_RDMA_WRITE:
182 case IBV_WR_RDMA_WRITE_WITH_IMM:
183 case IBV_WR_RDMA_READ:
184 ((struct mthca_raddr_seg *) wqe)->raddr =
185 htonll(wr->wr.rdma.remote_addr);
186 ((struct mthca_raddr_seg *) wqe)->rkey =
187 htonl(wr->wr.rdma.rkey);
188 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
189 wqe += sizeof (struct mthca_raddr_seg);
190 size += sizeof (struct mthca_raddr_seg) / 16;
191 break;
192
193 default:
194 /* No extra segments required for sends */
195 break;
196 }
197
198 break;
199
200 case IBV_QPT_UC:
201 switch (wr->opcode) {
202 case IBV_WR_RDMA_WRITE:
203 case IBV_WR_RDMA_WRITE_WITH_IMM:
204 ((struct mthca_raddr_seg *) wqe)->raddr =
205 htonll(wr->wr.rdma.remote_addr);
206 ((struct mthca_raddr_seg *) wqe)->rkey =
207 htonl(wr->wr.rdma.rkey);
208 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
209 wqe += sizeof (struct mthca_raddr_seg);
210 size += sizeof (struct mthca_raddr_seg) / 16;
211 break;
212
213 default:
214 /* No extra segments required for sends */
215 break;
216 }
217
218 break;
219
220 case IBV_QPT_UD:
221 ((struct mthca_tavor_ud_seg *) wqe)->lkey =
222 htonl(to_mah(wr->wr.ud.ah)->key);
223 ((struct mthca_tavor_ud_seg *) wqe)->av_addr =
224 htonll((uintptr_t) to_mah(wr->wr.ud.ah)->av);
225 ((struct mthca_tavor_ud_seg *) wqe)->dqpn =
226 htonl(wr->wr.ud.remote_qpn);
227 ((struct mthca_tavor_ud_seg *) wqe)->qkey =
228 htonl(wr->wr.ud.remote_qkey);
229
230 wqe += sizeof (struct mthca_tavor_ud_seg);
231 size += sizeof (struct mthca_tavor_ud_seg) / 16;
232 break;
233
234 default:
235 break;
236 }
237
238 if (wr->num_sge > qp->sq.max_gs) {
239 ret = -1;
240 *bad_wr = wr;
241 goto out;
242 }
243
244 if (wr->send_flags & IBV_SEND_INLINE) {
245 if (wr->num_sge) {
246 struct mthca_inline_seg *seg = wqe;
247 int s = 0;
248
249 wqe += sizeof *seg;
250 for (i = 0; i < wr->num_sge; ++i) {
251 struct ibv_sge *sge = &wr->sg_list[i];
252
253 s += sge->length;
254
255 if (s > qp->max_inline_data) {
256 ret = -1;
257 *bad_wr = wr;
258 goto out;
259 }
260
261 memcpy(wqe, (void *) (intptr_t) sge->addr,
262 sge->length);
263 wqe += sge->length;
264 }
265
266 seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
267 size += align(s + sizeof *seg, 16) / 16;
268 }
269 } else {
270 struct mthca_data_seg *seg;
271
272 for (i = 0; i < wr->num_sge; ++i) {
273 seg = wqe;
274 seg->byte_count = htonl(wr->sg_list[i].length);
275 seg->lkey = htonl(wr->sg_list[i].lkey);
276 seg->addr = htonll(wr->sg_list[i].addr);
277 wqe += sizeof *seg;
278 }
279
280 size += wr->num_sge * (sizeof *seg / 16);
281 }
282
283 qp->wrid[ind + qp->rq.max] = wr->wr_id;
284
285 if (wr->opcode >= sizeof mthca_opcode / sizeof mthca_opcode[0]) {
286 ret = -1;
287 *bad_wr = wr;
288 goto out;
289 }
290
291 ((struct mthca_next_seg *) prev_wqe)->nda_op =
292 htonl(((ind << qp->sq.wqe_shift) +
293 qp->send_wqe_offset) |
294 mthca_opcode[wr->opcode]);
295 /*
296 * Make sure that nda_op is written before setting ee_nds.
297 */
298 wmb();
299 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
300 htonl((size0 ? 0 : MTHCA_NEXT_DBD) | size |
301 ((wr->send_flags & IBV_SEND_FENCE) ?
302 MTHCA_NEXT_FENCE : 0));
303
304 if (!size0) {
305 size0 = size;
306 op0 = mthca_opcode[wr->opcode];
307 f0 = wr->send_flags & IBV_SEND_FENCE ?
308 MTHCA_SEND_DOORBELL_FENCE : 0;
309 }
310
311 ++ind;
312 if (ind >= qp->sq.max)
313 ind -= qp->sq.max;
314 }
315
316 out:
317 if (nreq) {
318 uint32_t doorbell[2];
319
320 doorbell[0] = htonl(((qp->sq.next_ind << qp->sq.wqe_shift) +
321 qp->send_wqe_offset) | f0 | op0);
322 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
323
324 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
325 }
326
327 qp->sq.next_ind = ind;
328 qp->sq.head += nreq;
329
330 pthread_spin_unlock(&qp->sq.lock);
331 return ret;
332 }
333
mthca_tavor_post_recv(struct ibv_qp * ibqp,struct ibv_recv_wr * wr,struct ibv_recv_wr ** bad_wr)334 int mthca_tavor_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
335 struct ibv_recv_wr **bad_wr)
336 {
337 struct mthca_qp *qp = to_mqp(ibqp);
338 uint32_t doorbell[2];
339 int ret = 0;
340 int nreq;
341 int i;
342 int size;
343 int size0 = 0;
344 int ind;
345 void *wqe;
346 void *prev_wqe;
347
348 pthread_spin_lock(&qp->rq.lock);
349
350 ind = qp->rq.next_ind;
351
352 for (nreq = 0; wr; wr = wr->next) {
353 if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
354 ret = -1;
355 *bad_wr = wr;
356 goto out;
357 }
358
359 wqe = get_recv_wqe(qp, ind);
360 prev_wqe = qp->rq.last;
361 qp->rq.last = wqe;
362
363 ((struct mthca_next_seg *) wqe)->ee_nds =
364 htonl(MTHCA_NEXT_DBD);
365 ((struct mthca_next_seg *) wqe)->flags =
366 htonl(MTHCA_NEXT_CQ_UPDATE);
367
368 wqe += sizeof (struct mthca_next_seg);
369 size = sizeof (struct mthca_next_seg) / 16;
370
371 if (wr->num_sge > qp->rq.max_gs) {
372 ret = -1;
373 *bad_wr = wr;
374 goto out;
375 }
376
377 for (i = 0; i < wr->num_sge; ++i) {
378 ((struct mthca_data_seg *) wqe)->byte_count =
379 htonl(wr->sg_list[i].length);
380 ((struct mthca_data_seg *) wqe)->lkey =
381 htonl(wr->sg_list[i].lkey);
382 ((struct mthca_data_seg *) wqe)->addr =
383 htonll(wr->sg_list[i].addr);
384 wqe += sizeof (struct mthca_data_seg);
385 size += sizeof (struct mthca_data_seg) / 16;
386 }
387
388 qp->wrid[ind] = wr->wr_id;
389
390 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
391 htonl(MTHCA_NEXT_DBD | size);
392
393 if (!size0)
394 size0 = size;
395
396 ++ind;
397 if (ind >= qp->rq.max)
398 ind -= qp->rq.max;
399
400 ++nreq;
401 if (nreq == MTHCA_TAVOR_MAX_WQES_PER_RECV_DB) {
402 nreq = 0;
403
404 doorbell[0] = htonl((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
405 doorbell[1] = htonl(ibqp->qp_num << 8);
406
407 /*
408 * Make sure that descriptors are written
409 * before doorbell is rung.
410 */
411 wmb();
412
413 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_RECV_DOORBELL);
414
415 qp->rq.next_ind = ind;
416 qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB;
417 size0 = 0;
418 }
419 }
420
421 out:
422 if (nreq) {
423 doorbell[0] = htonl((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
424 doorbell[1] = htonl((ibqp->qp_num << 8) | nreq);
425
426 /*
427 * Make sure that descriptors are written before
428 * doorbell is rung.
429 */
430 wmb();
431
432 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_RECV_DOORBELL);
433 }
434
435 qp->rq.next_ind = ind;
436 qp->rq.head += nreq;
437
438 pthread_spin_unlock(&qp->rq.lock);
439 return ret;
440 }
441
mthca_arbel_post_send(struct ibv_qp * ibqp,struct ibv_send_wr * wr,struct ibv_send_wr ** bad_wr)442 int mthca_arbel_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
443 struct ibv_send_wr **bad_wr)
444 {
445 struct mthca_qp *qp = to_mqp(ibqp);
446 uint32_t doorbell[2];
447 void *wqe, *prev_wqe;
448 int ind;
449 int nreq;
450 int ret = 0;
451 int size;
452 int size0 = 0;
453 int i;
454 /*
455 * f0 and op0 cannot be used unless nreq > 0, which means this
456 * function makes it through the loop at least once. So the
457 * code inside the if (!size0) will be executed, and f0 and
458 * op0 will be initialized. So any gcc warning about "may be
459 * used unitialized" is bogus.
460 */
461 uint32_t f0;
462 uint32_t op0;
463
464 pthread_spin_lock(&qp->sq.lock);
465
466 /* XXX check that state is OK to post send */
467
468 ind = qp->sq.head & (qp->sq.max - 1);
469
470 for (nreq = 0; wr; ++nreq, wr = wr->next) {
471 if (nreq == MTHCA_ARBEL_MAX_WQES_PER_SEND_DB) {
472 nreq = 0;
473
474 doorbell[0] = htonl((MTHCA_ARBEL_MAX_WQES_PER_SEND_DB << 24) |
475 ((qp->sq.head & 0xffff) << 8) | f0 | op0);
476 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
477
478 qp->sq.head += MTHCA_ARBEL_MAX_WQES_PER_SEND_DB;
479
480 /*
481 * Make sure that descriptors are written before
482 * doorbell record.
483 */
484 wmb();
485 *qp->sq.db = htonl(qp->sq.head & 0xffff);
486
487 /*
488 * Make sure doorbell record is written before we
489 * write MMIO send doorbell.
490 */
491 wmb();
492 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
493
494 size0 = 0;
495 }
496
497 if (wq_overflow(&qp->sq, nreq, to_mcq(qp->ibv_qp.send_cq))) {
498 ret = -1;
499 *bad_wr = wr;
500 goto out;
501 }
502
503 wqe = get_send_wqe(qp, ind);
504 prev_wqe = qp->sq.last;
505 qp->sq.last = wqe;
506
507 ((struct mthca_next_seg *) wqe)->flags =
508 ((wr->send_flags & IBV_SEND_SIGNALED) ?
509 htonl(MTHCA_NEXT_CQ_UPDATE) : 0) |
510 ((wr->send_flags & IBV_SEND_SOLICITED) ?
511 htonl(MTHCA_NEXT_SOLICIT) : 0) |
512 htonl(1);
513 if (wr->opcode == IBV_WR_SEND_WITH_IMM ||
514 wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM)
515 ((struct mthca_next_seg *) wqe)->imm = wr->imm_data;
516
517 wqe += sizeof (struct mthca_next_seg);
518 size = sizeof (struct mthca_next_seg) / 16;
519
520 switch (ibqp->qp_type) {
521 case IBV_QPT_RC:
522 switch (wr->opcode) {
523 case IBV_WR_ATOMIC_CMP_AND_SWP:
524 case IBV_WR_ATOMIC_FETCH_AND_ADD:
525 ((struct mthca_raddr_seg *) wqe)->raddr =
526 htonll(wr->wr.atomic.remote_addr);
527 ((struct mthca_raddr_seg *) wqe)->rkey =
528 htonl(wr->wr.atomic.rkey);
529 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
530
531 wqe += sizeof (struct mthca_raddr_seg);
532
533 if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) {
534 ((struct mthca_atomic_seg *) wqe)->swap_add =
535 htonll(wr->wr.atomic.swap);
536 ((struct mthca_atomic_seg *) wqe)->compare =
537 htonll(wr->wr.atomic.compare_add);
538 } else {
539 ((struct mthca_atomic_seg *) wqe)->swap_add =
540 htonll(wr->wr.atomic.compare_add);
541 ((struct mthca_atomic_seg *) wqe)->compare = 0;
542 }
543
544 wqe += sizeof (struct mthca_atomic_seg);
545 size += (sizeof (struct mthca_raddr_seg) +
546 sizeof (struct mthca_atomic_seg)) / 16;
547 break;
548
549 case IBV_WR_RDMA_WRITE:
550 case IBV_WR_RDMA_WRITE_WITH_IMM:
551 case IBV_WR_RDMA_READ:
552 ((struct mthca_raddr_seg *) wqe)->raddr =
553 htonll(wr->wr.rdma.remote_addr);
554 ((struct mthca_raddr_seg *) wqe)->rkey =
555 htonl(wr->wr.rdma.rkey);
556 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
557 wqe += sizeof (struct mthca_raddr_seg);
558 size += sizeof (struct mthca_raddr_seg) / 16;
559 break;
560
561 default:
562 /* No extra segments required for sends */
563 break;
564 }
565
566 break;
567
568 case IBV_QPT_UC:
569 switch (wr->opcode) {
570 case IBV_WR_RDMA_WRITE:
571 case IBV_WR_RDMA_WRITE_WITH_IMM:
572 ((struct mthca_raddr_seg *) wqe)->raddr =
573 htonll(wr->wr.rdma.remote_addr);
574 ((struct mthca_raddr_seg *) wqe)->rkey =
575 htonl(wr->wr.rdma.rkey);
576 ((struct mthca_raddr_seg *) wqe)->reserved = 0;
577 wqe += sizeof (struct mthca_raddr_seg);
578 size += sizeof (struct mthca_raddr_seg) / 16;
579 break;
580
581 default:
582 /* No extra segments required for sends */
583 break;
584 }
585
586 break;
587
588 case IBV_QPT_UD:
589 memcpy(((struct mthca_arbel_ud_seg *) wqe)->av,
590 to_mah(wr->wr.ud.ah)->av, sizeof (struct mthca_av));
591 ((struct mthca_arbel_ud_seg *) wqe)->dqpn =
592 htonl(wr->wr.ud.remote_qpn);
593 ((struct mthca_arbel_ud_seg *) wqe)->qkey =
594 htonl(wr->wr.ud.remote_qkey);
595
596 wqe += sizeof (struct mthca_arbel_ud_seg);
597 size += sizeof (struct mthca_arbel_ud_seg) / 16;
598 break;
599
600 default:
601 break;
602 }
603
604 if (wr->num_sge > qp->sq.max_gs) {
605 ret = -1;
606 *bad_wr = wr;
607 goto out;
608 }
609
610 if (wr->send_flags & IBV_SEND_INLINE) {
611 if (wr->num_sge) {
612 struct mthca_inline_seg *seg = wqe;
613 int s = 0;
614
615 wqe += sizeof *seg;
616 for (i = 0; i < wr->num_sge; ++i) {
617 struct ibv_sge *sge = &wr->sg_list[i];
618
619 s += sge->length;
620
621 if (s > qp->max_inline_data) {
622 ret = -1;
623 *bad_wr = wr;
624 goto out;
625 }
626
627 memcpy(wqe, (void *) (uintptr_t) sge->addr,
628 sge->length);
629 wqe += sge->length;
630 }
631
632 seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
633 size += align(s + sizeof *seg, 16) / 16;
634 }
635 } else {
636 struct mthca_data_seg *seg;
637
638 for (i = 0; i < wr->num_sge; ++i) {
639 seg = wqe;
640 seg->byte_count = htonl(wr->sg_list[i].length);
641 seg->lkey = htonl(wr->sg_list[i].lkey);
642 seg->addr = htonll(wr->sg_list[i].addr);
643 wqe += sizeof *seg;
644 }
645
646 size += wr->num_sge * (sizeof *seg / 16);
647 }
648
649 qp->wrid[ind + qp->rq.max] = wr->wr_id;
650
651 if (wr->opcode >= sizeof mthca_opcode / sizeof mthca_opcode[0]) {
652 ret = -1;
653 *bad_wr = wr;
654 goto out;
655 }
656
657 ((struct mthca_next_seg *) prev_wqe)->nda_op =
658 htonl(((ind << qp->sq.wqe_shift) +
659 qp->send_wqe_offset) |
660 mthca_opcode[wr->opcode]);
661 wmb();
662 ((struct mthca_next_seg *) prev_wqe)->ee_nds =
663 htonl(MTHCA_NEXT_DBD | size |
664 ((wr->send_flags & IBV_SEND_FENCE) ?
665 MTHCA_NEXT_FENCE : 0));
666
667 if (!size0) {
668 size0 = size;
669 op0 = mthca_opcode[wr->opcode];
670 f0 = wr->send_flags & IBV_SEND_FENCE ?
671 MTHCA_SEND_DOORBELL_FENCE : 0;
672 }
673
674 ++ind;
675 if (ind >= qp->sq.max)
676 ind -= qp->sq.max;
677 }
678
679 out:
680 if (nreq) {
681 doorbell[0] = htonl((nreq << 24) |
682 ((qp->sq.head & 0xffff) << 8) |
683 f0 | op0);
684 doorbell[1] = htonl((ibqp->qp_num << 8) | size0);
685
686 qp->sq.head += nreq;
687
688 /*
689 * Make sure that descriptors are written before
690 * doorbell record.
691 */
692 wmb();
693 *qp->sq.db = htonl(qp->sq.head & 0xffff);
694
695 /*
696 * Make sure doorbell record is written before we
697 * write MMIO send doorbell.
698 */
699 wmb();
700 mthca_write64(doorbell, to_mctx(ibqp->context), MTHCA_SEND_DOORBELL);
701 }
702
703 pthread_spin_unlock(&qp->sq.lock);
704 return ret;
705 }
706
mthca_arbel_post_recv(struct ibv_qp * ibqp,struct ibv_recv_wr * wr,struct ibv_recv_wr ** bad_wr)707 int mthca_arbel_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
708 struct ibv_recv_wr **bad_wr)
709 {
710 struct mthca_qp *qp = to_mqp(ibqp);
711 int ret = 0;
712 int nreq;
713 int ind;
714 int i;
715 void *wqe;
716
717 pthread_spin_lock(&qp->rq.lock);
718
719 /* XXX check that state is OK to post receive */
720
721 ind = qp->rq.head & (qp->rq.max - 1);
722
723 for (nreq = 0; wr; ++nreq, wr = wr->next) {
724 if (wq_overflow(&qp->rq, nreq, to_mcq(qp->ibv_qp.recv_cq))) {
725 ret = -1;
726 *bad_wr = wr;
727 goto out;
728 }
729
730 wqe = get_recv_wqe(qp, ind);
731
732 ((struct mthca_next_seg *) wqe)->flags = 0;
733
734 wqe += sizeof (struct mthca_next_seg);
735
736 if (wr->num_sge > qp->rq.max_gs) {
737 ret = -1;
738 *bad_wr = wr;
739 goto out;
740 }
741
742 for (i = 0; i < wr->num_sge; ++i) {
743 ((struct mthca_data_seg *) wqe)->byte_count =
744 htonl(wr->sg_list[i].length);
745 ((struct mthca_data_seg *) wqe)->lkey =
746 htonl(wr->sg_list[i].lkey);
747 ((struct mthca_data_seg *) wqe)->addr =
748 htonll(wr->sg_list[i].addr);
749 wqe += sizeof (struct mthca_data_seg);
750 }
751
752 if (i < qp->rq.max_gs) {
753 ((struct mthca_data_seg *) wqe)->byte_count = 0;
754 ((struct mthca_data_seg *) wqe)->lkey = htonl(MTHCA_INVAL_LKEY);
755 ((struct mthca_data_seg *) wqe)->addr = 0;
756 }
757
758 qp->wrid[ind] = wr->wr_id;
759
760 ++ind;
761 if (ind >= qp->rq.max)
762 ind -= qp->rq.max;
763 }
764 out:
765 if (nreq) {
766 qp->rq.head += nreq;
767
768 /*
769 * Make sure that descriptors are written before
770 * doorbell record.
771 */
772 wmb();
773 *qp->rq.db = htonl(qp->rq.head & 0xffff);
774 }
775
776 pthread_spin_unlock(&qp->rq.lock);
777 return ret;
778 }
779
mthca_alloc_qp_buf(struct ibv_pd * pd,struct ibv_qp_cap * cap,enum ibv_qp_type type,struct mthca_qp * qp)780 int mthca_alloc_qp_buf(struct ibv_pd *pd, struct ibv_qp_cap *cap,
781 enum ibv_qp_type type, struct mthca_qp *qp)
782 {
783 int size;
784 int max_sq_sge;
785 struct mthca_next_seg *next;
786 int i;
787
788 qp->rq.max_gs = cap->max_recv_sge;
789 qp->sq.max_gs = cap->max_send_sge;
790 max_sq_sge = align(cap->max_inline_data + sizeof (struct mthca_inline_seg),
791 sizeof (struct mthca_data_seg)) / sizeof (struct mthca_data_seg);
792 if (max_sq_sge < cap->max_send_sge)
793 max_sq_sge = cap->max_send_sge;
794
795 qp->wrid = malloc((qp->rq.max + qp->sq.max) * sizeof (uint64_t));
796 if (!qp->wrid)
797 return -1;
798
799 size = sizeof (struct mthca_next_seg) +
800 qp->rq.max_gs * sizeof (struct mthca_data_seg);
801
802 for (qp->rq.wqe_shift = 6; 1 << qp->rq.wqe_shift < size;
803 qp->rq.wqe_shift++)
804 ; /* nothing */
805
806 size = max_sq_sge * sizeof (struct mthca_data_seg);
807 switch (type) {
808 case IBV_QPT_UD:
809 size += mthca_is_memfree(pd->context) ?
810 sizeof (struct mthca_arbel_ud_seg) :
811 sizeof (struct mthca_tavor_ud_seg);
812 break;
813
814 case IBV_QPT_UC:
815 size += sizeof (struct mthca_raddr_seg);
816 break;
817
818 case IBV_QPT_RC:
819 size += sizeof (struct mthca_raddr_seg);
820 /*
821 * An atomic op will require an atomic segment, a
822 * remote address segment and one scatter entry.
823 */
824 if (size < (sizeof (struct mthca_atomic_seg) +
825 sizeof (struct mthca_raddr_seg) +
826 sizeof (struct mthca_data_seg)))
827 size = (sizeof (struct mthca_atomic_seg) +
828 sizeof (struct mthca_raddr_seg) +
829 sizeof (struct mthca_data_seg));
830 break;
831
832 default:
833 break;
834 }
835
836 /* Make sure that we have enough space for a bind request */
837 if (size < sizeof (struct mthca_bind_seg))
838 size = sizeof (struct mthca_bind_seg);
839
840 size += sizeof (struct mthca_next_seg);
841
842 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size;
843 qp->sq.wqe_shift++)
844 ; /* nothing */
845
846 qp->send_wqe_offset = align(qp->rq.max << qp->rq.wqe_shift,
847 1 << qp->sq.wqe_shift);
848
849 qp->buf_size = qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift);
850
851 if (mthca_alloc_buf(&qp->buf,
852 align(qp->buf_size, to_mdev(pd->context->device)->page_size),
853 to_mdev(pd->context->device)->page_size)) {
854 free(qp->wrid);
855 return -1;
856 }
857
858 memset(qp->buf.buf, 0, qp->buf_size);
859
860 if (mthca_is_memfree(pd->context)) {
861 struct mthca_data_seg *scatter;
862 uint32_t sz;
863
864 sz = htonl((sizeof (struct mthca_next_seg) +
865 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16);
866
867 for (i = 0; i < qp->rq.max; ++i) {
868 next = get_recv_wqe(qp, i);
869 next->nda_op = htonl(((i + 1) & (qp->rq.max - 1)) <<
870 qp->rq.wqe_shift);
871 next->ee_nds = sz;
872
873 for (scatter = (void *) (next + 1);
874 (void *) scatter < (void *) next + (1 << qp->rq.wqe_shift);
875 ++scatter)
876 scatter->lkey = htonl(MTHCA_INVAL_LKEY);
877 }
878
879 for (i = 0; i < qp->sq.max; ++i) {
880 next = get_send_wqe(qp, i);
881 next->nda_op = htonl((((i + 1) & (qp->sq.max - 1)) <<
882 qp->sq.wqe_shift) +
883 qp->send_wqe_offset);
884 }
885 } else {
886 for (i = 0; i < qp->rq.max; ++i) {
887 next = get_recv_wqe(qp, i);
888 next->nda_op = htonl((((i + 1) % qp->rq.max) <<
889 qp->rq.wqe_shift) | 1);
890 }
891 }
892
893 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
894 qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
895
896 return 0;
897 }
898
mthca_find_qp(struct mthca_context * ctx,uint32_t qpn)899 struct mthca_qp *mthca_find_qp(struct mthca_context *ctx, uint32_t qpn)
900 {
901 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
902
903 if (ctx->qp_table[tind].refcnt)
904 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask];
905 else
906 return NULL;
907 }
908
mthca_store_qp(struct mthca_context * ctx,uint32_t qpn,struct mthca_qp * qp)909 int mthca_store_qp(struct mthca_context *ctx, uint32_t qpn, struct mthca_qp *qp)
910 {
911 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
912
913 if (!ctx->qp_table[tind].refcnt) {
914 ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1,
915 sizeof (struct mthca_qp *));
916 if (!ctx->qp_table[tind].table)
917 return -1;
918 }
919
920 ++ctx->qp_table[tind].refcnt;
921 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp;
922 return 0;
923 }
924
mthca_clear_qp(struct mthca_context * ctx,uint32_t qpn)925 void mthca_clear_qp(struct mthca_context *ctx, uint32_t qpn)
926 {
927 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift;
928
929 if (!--ctx->qp_table[tind].refcnt)
930 free(ctx->qp_table[tind].table);
931 else
932 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL;
933 }
934
mthca_free_err_wqe(struct mthca_qp * qp,int is_send,int index,int * dbd,uint32_t * new_wqe)935 int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
936 int index, int *dbd, uint32_t *new_wqe)
937 {
938 struct mthca_next_seg *next;
939
940 /*
941 * For SRQs, all receive WQEs generate a CQE, so we're always
942 * at the end of the doorbell chain.
943 */
944 if (qp->ibv_qp.srq && !is_send) {
945 *new_wqe = 0;
946 return 0;
947 }
948
949 if (is_send)
950 next = get_send_wqe(qp, index);
951 else
952 next = get_recv_wqe(qp, index);
953
954 *dbd = !!(next->ee_nds & htonl(MTHCA_NEXT_DBD));
955 if (next->ee_nds & htonl(0x3f))
956 *new_wqe = (next->nda_op & htonl(~0x3f)) |
957 (next->ee_nds & htonl(0x3f));
958 else
959 *new_wqe = 0;
960
961 return 0;
962 }
963
964