1 /*
2 * Copyright (c) 2006-2014 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 #if HAVE_CONFIG_H
33 # include <config.h>
34 #endif /* HAVE_CONFIG_H */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <unistd.h>
39 #include <errno.h>
40 #include <sys/mman.h>
41 #include <pthread.h>
42 #include <string.h>
43 #include <signal.h>
44
45 #include "libcxgb4.h"
46 #include "cxgb4-abi.h"
47
48 #define PCI_VENDOR_ID_CHELSIO 0x1425
49
50 /*
51 * Macros needed to support the PCI Device ID Table ...
52 */
53 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
54 struct { \
55 unsigned vendor; \
56 unsigned device; \
57 unsigned chip_version; \
58 } hca_table[] = {
59
60 #define CH_PCI_DEVICE_ID_FUNCTION \
61 0x4
62
63 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
64 { \
65 .vendor = PCI_VENDOR_ID_CHELSIO, \
66 .device = (__DeviceID), \
67 .chip_version = CHELSIO_PCI_ID_CHIP_VERSION(__DeviceID), \
68 }
69
70 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
71 }
72
73 #include "t4_chip_type.h"
74 #include "t4_pci_id_tbl.h"
75
76 unsigned long c4iw_page_size;
77 unsigned long c4iw_page_shift;
78 unsigned long c4iw_page_mask;
79 int ma_wr;
80 int t5_en_wc = 1;
81
82 SLIST_HEAD(devices_struct, c4iw_dev) devices;
83
84 static struct ibv_context_ops c4iw_ctx_ops = {
85 .query_device = c4iw_query_device,
86 .query_port = c4iw_query_port,
87 .alloc_pd = c4iw_alloc_pd,
88 .dealloc_pd = c4iw_free_pd,
89 .reg_mr = c4iw_reg_mr,
90 .dereg_mr = c4iw_dereg_mr,
91 .create_cq = c4iw_create_cq,
92 .resize_cq = c4iw_resize_cq,
93 .destroy_cq = c4iw_destroy_cq,
94 .create_srq = c4iw_create_srq,
95 .modify_srq = c4iw_modify_srq,
96 .destroy_srq = c4iw_destroy_srq,
97 .create_qp = c4iw_create_qp,
98 .modify_qp = c4iw_modify_qp,
99 .destroy_qp = c4iw_destroy_qp,
100 .query_qp = c4iw_query_qp,
101 .create_ah = c4iw_create_ah,
102 .destroy_ah = c4iw_destroy_ah,
103 .attach_mcast = c4iw_attach_mcast,
104 .detach_mcast = c4iw_detach_mcast,
105 .post_srq_recv = c4iw_post_srq_recv,
106 .req_notify_cq = c4iw_arm_cq,
107 };
108
c4iw_alloc_context(struct ibv_device * ibdev,int cmd_fd)109 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
110 int cmd_fd)
111 {
112 struct c4iw_context *context;
113 struct ibv_get_context cmd;
114 struct c4iw_alloc_ucontext_resp resp;
115 struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
116 struct ibv_query_device qcmd;
117 uint64_t raw_fw_ver;
118 struct ibv_device_attr attr;
119
120 context = malloc(sizeof *context);
121 if (!context)
122 return NULL;
123
124 memset(context, 0, sizeof *context);
125 context->ibv_ctx.cmd_fd = cmd_fd;
126
127 resp.status_page_size = 0;
128 resp.reserved = 0;
129 if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
130 &resp.ibv_resp, sizeof resp))
131 goto err_free;
132
133 if (resp.reserved)
134 PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
135 __FUNCTION__);
136
137 context->status_page_size = resp.status_page_size;
138 if (resp.status_page_size) {
139 context->status_page = mmap(NULL, resp.status_page_size,
140 PROT_READ, MAP_SHARED, cmd_fd,
141 resp.status_page_key);
142 if (context->status_page == MAP_FAILED)
143 goto err_free;
144 }
145
146 context->ibv_ctx.device = ibdev;
147 context->ibv_ctx.ops = c4iw_ctx_ops;
148
149 switch (rhp->chip_version) {
150 case CHELSIO_T5:
151 PDBG("%s T5/T4 device\n", __FUNCTION__);
152 case CHELSIO_T4:
153 PDBG("%s T4 device\n", __FUNCTION__);
154 context->ibv_ctx.ops.async_event = c4iw_async_event;
155 context->ibv_ctx.ops.post_send = c4iw_post_send;
156 context->ibv_ctx.ops.post_recv = c4iw_post_receive;
157 context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
158 context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
159 break;
160 default:
161 PDBG("%s unknown hca type %d\n", __FUNCTION__,
162 rhp->chip_version);
163 goto err_unmap;
164 break;
165 }
166
167 if (!rhp->mmid2ptr) {
168 int ret;
169
170 ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
171 sizeof qcmd);
172 if (ret)
173 goto err_unmap;
174 rhp->max_mr = attr.max_mr;
175 rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
176 if (!rhp->mmid2ptr) {
177 goto err_unmap;
178 }
179 rhp->max_qp = T4_QID_BASE + attr.max_cq;
180 rhp->qpid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *));
181 if (!rhp->qpid2ptr) {
182 goto err_unmap;
183 }
184 rhp->max_cq = T4_QID_BASE + attr.max_cq;
185 rhp->cqid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *));
186 if (!rhp->cqid2ptr)
187 goto err_unmap;
188 }
189
190 return &context->ibv_ctx;
191
192 err_unmap:
193 munmap(context->status_page, context->status_page_size);
194 err_free:
195 if (rhp->cqid2ptr)
196 free(rhp->cqid2ptr);
197 if (rhp->qpid2ptr)
198 free(rhp->cqid2ptr);
199 if (rhp->mmid2ptr)
200 free(rhp->cqid2ptr);
201 free(context);
202 return NULL;
203 }
204
c4iw_free_context(struct ibv_context * ibctx)205 static void c4iw_free_context(struct ibv_context *ibctx)
206 {
207 struct c4iw_context *context = to_c4iw_context(ibctx);
208
209 if (context->status_page_size)
210 munmap(context->status_page, context->status_page_size);
211 free(context);
212 }
213
214 static struct ibv_device_ops c4iw_dev_ops = {
215 .alloc_context = c4iw_alloc_context,
216 .free_context = c4iw_free_context
217 };
218
219 #ifdef STALL_DETECTION
220
221 int stall_to;
222
dump_cq(struct c4iw_cq * chp)223 static void dump_cq(struct c4iw_cq *chp)
224 {
225 int i;
226
227 fprintf(stderr,
228 "CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
229 "cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
230 chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
231 chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
232 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64_to_cpu(chp->cq.bits_type_ts),
233 t4_cq_notempty(&chp->cq) || (chp->iq ? t4_iq_notempty(chp->iq) : 0));
234
235 for (i=0; i < chp->cq.size; i++) {
236 u64 *p = (u64 *)(chp->cq.queue + i);
237
238 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
239 if (i == chp->cq.cidx)
240 fprintf(stderr, " <-- cidx\n");
241 else
242 fprintf(stderr, "\n");
243 p+= 2;
244 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
245 p+= 2;
246 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
247 p+= 2;
248 fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
249 p+= 2;
250 }
251 }
252
dump_qp(struct c4iw_qp * qhp)253 static void dump_qp(struct c4iw_qp *qhp)
254 {
255 int i;
256 int j;
257 struct t4_swsqe *swsqe;
258 struct t4_swrqe *swrqe;
259 u16 cidx, pidx;
260 u64 *p;
261
262 fprintf(stderr,
263 "QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
264 " SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
265 " RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
266 qhp,
267 qhp->wq.sq.qid,
268 qhp->wq.error,
269 qhp->wq.flushed,
270 qhp->wq.qid_mask,
271 qhp->wq.sq.qid,
272 qhp->wq.sq.queue,
273 qhp->wq.sq.sw_sq,
274 qhp->wq.sq.cidx,
275 qhp->wq.sq.pidx,
276 qhp->wq.sq.in_use,
277 qhp->wq.sq.wq_pidx,
278 qhp->wq.sq.size,
279 qhp->wq.sq.flags,
280 qhp->wq.sq.flush_cidx,
281 qhp->wq.rq.qid,
282 qhp->wq.rq.queue,
283 qhp->wq.rq.sw_rq,
284 qhp->wq.rq.cidx,
285 qhp->wq.rq.pidx,
286 qhp->wq.rq.in_use,
287 qhp->wq.rq.size);
288 cidx = qhp->wq.sq.cidx;
289 pidx = qhp->wq.sq.pidx;
290 if (cidx != pidx)
291 fprintf(stderr, "SQ: \n");
292 while (cidx != pidx) {
293 swsqe = &qhp->wq.sq.sw_sq[cidx];
294 fprintf(stderr, "%04u: wr_id %016" PRIx64
295 " sq_wptr %08x read_len %u opcode 0x%x "
296 "complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
297 cidx,
298 swsqe->wr_id,
299 swsqe->idx,
300 swsqe->read_len,
301 swsqe->opcode,
302 swsqe->complete,
303 swsqe->signaled,
304 cpu_to_be64(swsqe->cqe.u.flits[0]),
305 cpu_to_be64(swsqe->cqe.u.flits[1]),
306 cpu_to_be64((u64)swsqe->cqe.reserved),
307 cpu_to_be64(swsqe->cqe.bits_type_ts));
308 if (++cidx == qhp->wq.sq.size)
309 cidx = 0;
310 }
311
312 fprintf(stderr, "SQ WQ: \n");
313 p = (u64 *)qhp->wq.sq.queue;
314 for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
315 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
316 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
317 i, ntohll(p[0]), ntohll(p[1]));
318 if (j == 0 && i == qhp->wq.sq.wq_pidx)
319 fprintf(stderr, " <-- pidx");
320 fprintf(stderr, "\n");
321 p += 2;
322 }
323 }
324 cidx = qhp->wq.rq.cidx;
325 pidx = qhp->wq.rq.pidx;
326 if (cidx != pidx)
327 fprintf(stderr, "RQ: \n");
328 while (cidx != pidx) {
329 swrqe = &qhp->wq.rq.sw_rq[cidx];
330 fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
331 cidx,
332 swrqe->wr_id );
333 if (++cidx == qhp->wq.rq.size)
334 cidx = 0;
335 }
336
337 fprintf(stderr, "RQ WQ: \n");
338 p = (u64 *)qhp->wq.rq.queue;
339 for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
340 for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
341 fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
342 i, ntohll(p[0]), ntohll(p[1]));
343 if (j == 0 && i == qhp->wq.rq.pidx)
344 fprintf(stderr, " <-- pidx");
345 if (j == 0 && i == qhp->wq.rq.cidx)
346 fprintf(stderr, " <-- cidx");
347 fprintf(stderr, "\n");
348 p+=2;
349 }
350 }
351 }
352
dump_state()353 void dump_state()
354 {
355 struct c4iw_dev *dev;
356 int i;
357
358 fprintf(stderr, "STALL DETECTED:\n");
359 SLIST_FOREACH(dev, &devices, list) {
360 //pthread_spin_lock(&dev->lock);
361 fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
362 for (i=0; i < dev->max_cq; i++) {
363 if (dev->cqid2ptr[i]) {
364 struct c4iw_cq *chp = dev->cqid2ptr[i];
365 //pthread_spin_lock(&chp->lock);
366 dump_cq(chp);
367 //pthread_spin_unlock(&chp->lock);
368 }
369 }
370 for (i=0; i < dev->max_qp; i++) {
371 if (dev->qpid2ptr[i]) {
372 struct c4iw_qp *qhp = dev->qpid2ptr[i];
373 //pthread_spin_lock(&qhp->lock);
374 dump_qp(qhp);
375 //pthread_spin_unlock(&qhp->lock);
376 }
377 }
378 //pthread_spin_unlock(&dev->lock);
379 }
380 fprintf(stderr, "DUMP COMPLETE:\n");
381 fflush(stderr);
382 }
383 #endif /* end of STALL_DETECTION */
384
385 /*
386 * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
387 * can know if the driver supports the kernel mode db ringing.
388 */
389 int c4iw_abi_version = 1;
390
cxgb4_driver_init(const char * uverbs_sys_path,int abi_version)391 static struct ibv_device *cxgb4_driver_init(const char *uverbs_sys_path,
392 int abi_version)
393 {
394 char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
395 char t5nexstr[IBV_SYSFS_PATH_MAX];
396 struct c4iw_dev *dev;
397 unsigned vendor, device, fw_maj, fw_min;
398 int i;
399 char devnum=0;
400 char ib_param[16];
401
402 #ifndef __linux__
403 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
404 ibdev, sizeof ibdev) < 0)
405 return NULL;
406 /*
407 * Extract the non-numeric part of ibdev
408 * say "t5nex0" -> devname=="t5nex", devnum=0
409 */
410 if (strstr(ibdev,"t5nex")) {
411 devnum = atoi(ibdev+strlen("t5nex"));
412 sprintf(t5nexstr, "/dev/t5nex/%d", devnum);
413 } else
414 return NULL;
415
416 if (ibv_read_sysfs_file(t5nexstr, "\%pnpinfo",
417 value, sizeof value) < 0)
418 return NULL;
419 else {
420 if (strstr(value,"vendor=")) {
421 strncpy(ib_param, strstr(value,"vendor=")+strlen("vendor="),6);
422 sscanf(ib_param,"%i",&vendor);
423 }
424
425 if (strstr(value,"device=")) {
426 strncpy(ib_param, strstr(value,"device=")+strlen("device="),6);
427 sscanf(ib_param,"%i",&device);
428 }
429 }
430 #else
431 if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
432 value, sizeof value) < 0)
433 return NULL;
434 sscanf(value, "%i", &vendor);
435
436 if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
437 value, sizeof value) < 0)
438 return NULL;
439 sscanf(value, "%i", &device);
440 #endif
441
442 for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
443 if (vendor == hca_table[i].vendor &&
444 device == hca_table[i].device)
445 goto found;
446
447 return NULL;
448
449 found:
450 c4iw_abi_version = abi_version;
451
452
453 #ifndef __linux__
454 if (ibv_read_sysfs_file(t5nexstr, "firmware_version",
455 value, sizeof value) < 0)
456 return NULL;
457 #else
458 /*
459 * Verify that the firmware major number matches. Major number
460 * mismatches are fatal. Minor number mismatches are tolerated.
461 */
462 if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
463 ibdev, sizeof ibdev) < 0)
464 return NULL;
465
466 memset(devstr, 0, sizeof devstr);
467 snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
468 ibv_get_sysfs_path(), ibdev);
469 if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
470 return NULL;
471 #endif
472
473 cp = strtok(value+1, ".");
474 sscanf(cp, "%i", &fw_maj);
475 cp = strtok(NULL, ".");
476 sscanf(cp, "%i", &fw_min);
477
478 if (fw_maj < FW_MAJ) {
479 fprintf(stderr, "libcxgb4: Fatal firmware version mismatch. "
480 "Firmware major number is %u and libcxgb4 needs %u.\n",
481 fw_maj, FW_MAJ);
482 fflush(stderr);
483 return NULL;
484 }
485
486 DBGLOG("libcxgb4");
487
488 if (fw_min < FW_MIN) {
489 PDBG("libcxgb4: non-fatal firmware version mismatch. "
490 "Firmware minor number is %u and libcxgb4 needs %u.\n",
491 fw_maj, FW_MAJ);
492 fflush(stderr);
493 }
494
495 PDBG("%s found vendor %d device %d type %d\n",
496 __FUNCTION__, vendor, device, hca_table[i].chip_version);
497
498 dev = calloc(1, sizeof *dev);
499 if (!dev) {
500 return NULL;
501 }
502
503 pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE);
504 dev->ibv_dev.ops = c4iw_dev_ops;
505 dev->chip_version = hca_table[i].chip_version;
506 dev->abi_version = abi_version;
507
508 PDBG("%s device claimed\n", __FUNCTION__);
509 SLIST_INSERT_HEAD(&devices, dev, list);
510 #ifdef STALL_DETECTION
511 {
512 char *c = getenv("CXGB4_STALL_TIMEOUT");
513 if (c) {
514 stall_to = strtol(c, NULL, 0);
515 if (errno || stall_to < 0)
516 stall_to = 0;
517 }
518 }
519 #endif
520 {
521 char *c = getenv("CXGB4_MA_WR");
522 if (c) {
523 ma_wr = strtol(c, NULL, 0);
524 if (ma_wr != 1)
525 ma_wr = 0;
526 }
527 }
528 {
529 char *c = getenv("T5_ENABLE_WC");
530 if (c) {
531 t5_en_wc = strtol(c, NULL, 0);
532 if (t5_en_wc != 1)
533 t5_en_wc = 0;
534 }
535 }
536
537 return &dev->ibv_dev;
538 }
539
cxgb4_register_driver(void)540 static __attribute__((constructor)) void cxgb4_register_driver(void)
541 {
542 c4iw_page_size = sysconf(_SC_PAGESIZE);
543 c4iw_page_shift = long_log2(c4iw_page_size);
544 c4iw_page_mask = ~(c4iw_page_size - 1);
545 ibv_register_driver("cxgb4", cxgb4_driver_init);
546 }
547
548 #ifdef STATS
549 void __attribute__ ((destructor)) cs_fini(void);
cs_fini(void)550 void __attribute__ ((destructor)) cs_fini(void)
551 {
552 syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
553 "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
554 c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
555 c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
556 c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
557 }
558 #endif
559