xref: /NextBSD/contrib/ofed/libcxgb4/src/dev.c (revision 4bf303e5af1834cdd3092175eeca7676420229c4)
1 /*
2  * Copyright (c) 2006-2014 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #if HAVE_CONFIG_H
33 #  include <config.h>
34 #endif				/* HAVE_CONFIG_H */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <unistd.h>
39 #include <errno.h>
40 #include <sys/mman.h>
41 #include <pthread.h>
42 #include <string.h>
43 #include <signal.h>
44 
45 #include "libcxgb4.h"
46 #include "cxgb4-abi.h"
47 
48 #define PCI_VENDOR_ID_CHELSIO		0x1425
49 
50 /*
51  * Macros needed to support the PCI Device ID Table ...
52  */
53 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
54 	struct { \
55 		unsigned vendor; \
56 		unsigned device; \
57 		unsigned chip_version; \
58 	} hca_table[] = {
59 
60 #define CH_PCI_DEVICE_ID_FUNCTION \
61 		0x4
62 
63 #define CH_PCI_ID_TABLE_ENTRY(__DeviceID) \
64 		{ \
65 			.vendor = PCI_VENDOR_ID_CHELSIO, \
66 			.device = (__DeviceID), \
67 			.chip_version = CHELSIO_PCI_ID_CHIP_VERSION(__DeviceID), \
68 		}
69 
70 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END \
71 	}
72 
73 #include "t4_chip_type.h"
74 #include "t4_pci_id_tbl.h"
75 
76 unsigned long c4iw_page_size;
77 unsigned long c4iw_page_shift;
78 unsigned long c4iw_page_mask;
79 int ma_wr;
80 int t5_en_wc = 1;
81 
82 SLIST_HEAD(devices_struct, c4iw_dev) devices;
83 
84 static struct ibv_context_ops c4iw_ctx_ops = {
85 	.query_device = c4iw_query_device,
86 	.query_port = c4iw_query_port,
87 	.alloc_pd = c4iw_alloc_pd,
88 	.dealloc_pd = c4iw_free_pd,
89 	.reg_mr = c4iw_reg_mr,
90 	.dereg_mr = c4iw_dereg_mr,
91 	.create_cq = c4iw_create_cq,
92 	.resize_cq = c4iw_resize_cq,
93 	.destroy_cq = c4iw_destroy_cq,
94 	.create_srq = c4iw_create_srq,
95 	.modify_srq = c4iw_modify_srq,
96 	.destroy_srq = c4iw_destroy_srq,
97 	.create_qp = c4iw_create_qp,
98 	.modify_qp = c4iw_modify_qp,
99 	.destroy_qp = c4iw_destroy_qp,
100 	.query_qp = c4iw_query_qp,
101 	.create_ah = c4iw_create_ah,
102 	.destroy_ah = c4iw_destroy_ah,
103 	.attach_mcast = c4iw_attach_mcast,
104 	.detach_mcast = c4iw_detach_mcast,
105 	.post_srq_recv = c4iw_post_srq_recv,
106 	.req_notify_cq = c4iw_arm_cq,
107 };
108 
c4iw_alloc_context(struct ibv_device * ibdev,int cmd_fd)109 static struct ibv_context *c4iw_alloc_context(struct ibv_device *ibdev,
110 					      int cmd_fd)
111 {
112 	struct c4iw_context *context;
113 	struct ibv_get_context cmd;
114 	struct c4iw_alloc_ucontext_resp resp;
115 	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
116 	struct ibv_query_device qcmd;
117 	uint64_t raw_fw_ver;
118 	struct ibv_device_attr attr;
119 
120 	context = malloc(sizeof *context);
121 	if (!context)
122 		return NULL;
123 
124 	memset(context, 0, sizeof *context);
125 	context->ibv_ctx.cmd_fd = cmd_fd;
126 
127 	resp.status_page_size = 0;
128 	resp.reserved = 0;
129 	if (ibv_cmd_get_context(&context->ibv_ctx, &cmd, sizeof cmd,
130 				&resp.ibv_resp, sizeof resp))
131 		goto err_free;
132 
133 	if (resp.reserved)
134 		PDBG("%s c4iw_alloc_ucontext_resp reserved field modified by kernel\n",
135 		     __FUNCTION__);
136 
137 	context->status_page_size = resp.status_page_size;
138 	if (resp.status_page_size) {
139 		context->status_page = mmap(NULL, resp.status_page_size,
140 					    PROT_READ, MAP_SHARED, cmd_fd,
141 					    resp.status_page_key);
142 		if (context->status_page == MAP_FAILED)
143 			goto err_free;
144 	}
145 
146 	context->ibv_ctx.device = ibdev;
147 	context->ibv_ctx.ops = c4iw_ctx_ops;
148 
149 	switch (rhp->chip_version) {
150 	case CHELSIO_T5:
151 		PDBG("%s T5/T4 device\n", __FUNCTION__);
152 	case CHELSIO_T4:
153 		PDBG("%s T4 device\n", __FUNCTION__);
154 		context->ibv_ctx.ops.async_event = c4iw_async_event;
155 		context->ibv_ctx.ops.post_send = c4iw_post_send;
156 		context->ibv_ctx.ops.post_recv = c4iw_post_receive;
157 		context->ibv_ctx.ops.poll_cq = c4iw_poll_cq;
158 		context->ibv_ctx.ops.req_notify_cq = c4iw_arm_cq;
159 		break;
160 	default:
161 		PDBG("%s unknown hca type %d\n", __FUNCTION__,
162 		     rhp->chip_version);
163 		goto err_unmap;
164 		break;
165 	}
166 
167 	if (!rhp->mmid2ptr) {
168 		int ret;
169 
170 		ret = ibv_cmd_query_device(&context->ibv_ctx, &attr, &raw_fw_ver, &qcmd,
171 					   sizeof qcmd);
172 		if (ret)
173 			goto err_unmap;
174 		rhp->max_mr = attr.max_mr;
175 		rhp->mmid2ptr = calloc(attr.max_mr, sizeof(void *));
176 		if (!rhp->mmid2ptr) {
177 			goto err_unmap;
178 		}
179 		rhp->max_qp = T4_QID_BASE + attr.max_cq;
180 		rhp->qpid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *));
181 		if (!rhp->qpid2ptr) {
182 			goto err_unmap;
183 		}
184 		rhp->max_cq = T4_QID_BASE + attr.max_cq;
185 		rhp->cqid2ptr = calloc(T4_QID_BASE + attr.max_cq, sizeof(void *));
186 		if (!rhp->cqid2ptr)
187 			goto err_unmap;
188 	}
189 
190 	return &context->ibv_ctx;
191 
192 err_unmap:
193 	munmap(context->status_page, context->status_page_size);
194 err_free:
195 	if (rhp->cqid2ptr)
196 		free(rhp->cqid2ptr);
197 	if (rhp->qpid2ptr)
198 		free(rhp->cqid2ptr);
199 	if (rhp->mmid2ptr)
200 		free(rhp->cqid2ptr);
201 	free(context);
202 	return NULL;
203 }
204 
c4iw_free_context(struct ibv_context * ibctx)205 static void c4iw_free_context(struct ibv_context *ibctx)
206 {
207 	struct c4iw_context *context = to_c4iw_context(ibctx);
208 
209 	if (context->status_page_size)
210 		munmap(context->status_page, context->status_page_size);
211 	free(context);
212 }
213 
214 static struct ibv_device_ops c4iw_dev_ops = {
215 	.alloc_context = c4iw_alloc_context,
216 	.free_context = c4iw_free_context
217 };
218 
219 #ifdef STALL_DETECTION
220 
221 int stall_to;
222 
dump_cq(struct c4iw_cq * chp)223 static void dump_cq(struct c4iw_cq *chp)
224 {
225 	int i;
226 
227 	fprintf(stderr,
228  		"CQ: %p id %u queue %p cidx 0x%08x sw_queue %p sw_cidx %d sw_pidx %d sw_in_use %d depth %u error %u gen %d "
229 		"cidx_inc %d bits_type_ts %016" PRIx64 " notempty %d\n", chp,
230                 chp->cq.cqid, chp->cq.queue, chp->cq.cidx,
231 	 	chp->cq.sw_queue, chp->cq.sw_cidx, chp->cq.sw_pidx, chp->cq.sw_in_use,
232                 chp->cq.size, chp->cq.error, chp->cq.gen, chp->cq.cidx_inc, be64_to_cpu(chp->cq.bits_type_ts),
233 		t4_cq_notempty(&chp->cq) || (chp->iq ? t4_iq_notempty(chp->iq) : 0));
234 
235 	for (i=0; i < chp->cq.size; i++) {
236 		u64 *p = (u64 *)(chp->cq.queue + i);
237 
238 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64, i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
239 		if (i == chp->cq.cidx)
240 			fprintf(stderr, " <-- cidx\n");
241 		else
242 			fprintf(stderr, "\n");
243 		p+= 2;
244 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
245 		p+= 2;
246 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
247 		p+= 2;
248 		fprintf(stderr, "%02x: %016" PRIx64 " %016" PRIx64 "\n", i, be64_to_cpu(p[0]), be64_to_cpu(p[1]));
249 		p+= 2;
250 	}
251 }
252 
dump_qp(struct c4iw_qp * qhp)253 static void dump_qp(struct c4iw_qp *qhp)
254 {
255 	int i;
256 	int j;
257 	struct t4_swsqe *swsqe;
258 	struct t4_swrqe *swrqe;
259 	u16 cidx, pidx;
260 	u64 *p;
261 
262 	fprintf(stderr,
263 		"QP: %p id %u error %d flushed %d qid_mask 0x%x\n"
264 		"    SQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u wq_pidx %u depth %u flags 0x%x flush_cidx %d\n"
265 		"    RQ: id %u queue %p sw_queue %p cidx %u pidx %u in_use %u depth %u\n",
266 		qhp,
267 		qhp->wq.sq.qid,
268 		qhp->wq.error,
269 		qhp->wq.flushed,
270 		qhp->wq.qid_mask,
271 		qhp->wq.sq.qid,
272 		qhp->wq.sq.queue,
273 		qhp->wq.sq.sw_sq,
274 		qhp->wq.sq.cidx,
275 		qhp->wq.sq.pidx,
276 		qhp->wq.sq.in_use,
277 		qhp->wq.sq.wq_pidx,
278 		qhp->wq.sq.size,
279 		qhp->wq.sq.flags,
280 		qhp->wq.sq.flush_cidx,
281 		qhp->wq.rq.qid,
282 		qhp->wq.rq.queue,
283 		qhp->wq.rq.sw_rq,
284 		qhp->wq.rq.cidx,
285 		qhp->wq.rq.pidx,
286 		qhp->wq.rq.in_use,
287 		qhp->wq.rq.size);
288 	cidx = qhp->wq.sq.cidx;
289 	pidx = qhp->wq.sq.pidx;
290 	if (cidx != pidx)
291 		fprintf(stderr, "SQ: \n");
292 	while (cidx != pidx) {
293 		swsqe = &qhp->wq.sq.sw_sq[cidx];
294 		fprintf(stderr, "%04u: wr_id %016" PRIx64
295 			" sq_wptr %08x read_len %u opcode 0x%x "
296 			"complete %u signaled %u cqe %016" PRIx64 " %016" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n",
297 			cidx,
298 			swsqe->wr_id,
299 			swsqe->idx,
300 			swsqe->read_len,
301 			swsqe->opcode,
302 			swsqe->complete,
303 			swsqe->signaled,
304 			cpu_to_be64(swsqe->cqe.u.flits[0]),
305 			cpu_to_be64(swsqe->cqe.u.flits[1]),
306 			cpu_to_be64((u64)swsqe->cqe.reserved),
307 			cpu_to_be64(swsqe->cqe.bits_type_ts));
308 		if (++cidx == qhp->wq.sq.size)
309 			cidx = 0;
310 	}
311 
312 	fprintf(stderr, "SQ WQ: \n");
313 	p = (u64 *)qhp->wq.sq.queue;
314 	for (i=0; i < qhp->wq.sq.size * T4_SQ_NUM_SLOTS; i++) {
315 		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
316 			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
317 				i, ntohll(p[0]), ntohll(p[1]));
318 			if (j == 0 && i == qhp->wq.sq.wq_pidx)
319 				fprintf(stderr, " <-- pidx");
320 			fprintf(stderr, "\n");
321 			p += 2;
322 		}
323 	}
324 	cidx = qhp->wq.rq.cidx;
325 	pidx = qhp->wq.rq.pidx;
326 	if (cidx != pidx)
327 		fprintf(stderr, "RQ: \n");
328 	while (cidx != pidx) {
329 		swrqe = &qhp->wq.rq.sw_rq[cidx];
330 		fprintf(stderr, "%04u: wr_id %016" PRIx64 "\n",
331 			cidx,
332 			swrqe->wr_id );
333 		if (++cidx == qhp->wq.rq.size)
334 			cidx = 0;
335 	}
336 
337 	fprintf(stderr, "RQ WQ: \n");
338 	p = (u64 *)qhp->wq.rq.queue;
339 	for (i=0; i < qhp->wq.rq.size * T4_RQ_NUM_SLOTS; i++) {
340 		for (j=0; j < T4_EQ_ENTRY_SIZE / 16; j++) {
341 			fprintf(stderr, "%04u %016" PRIx64 " %016" PRIx64 " ",
342 				i, ntohll(p[0]), ntohll(p[1]));
343 			if (j == 0 && i == qhp->wq.rq.pidx)
344 				fprintf(stderr, " <-- pidx");
345 			if (j == 0 && i == qhp->wq.rq.cidx)
346 				fprintf(stderr, " <-- cidx");
347 			fprintf(stderr, "\n");
348 			p+=2;
349 		}
350 	}
351 }
352 
dump_state()353 void dump_state()
354 {
355 	struct c4iw_dev *dev;
356 	int i;
357 
358 	fprintf(stderr, "STALL DETECTED:\n");
359 	SLIST_FOREACH(dev, &devices, list) {
360 		//pthread_spin_lock(&dev->lock);
361 		fprintf(stderr, "Device %s\n", dev->ibv_dev.name);
362 		for (i=0; i < dev->max_cq; i++) {
363 			if (dev->cqid2ptr[i]) {
364 				struct c4iw_cq *chp = dev->cqid2ptr[i];
365 				//pthread_spin_lock(&chp->lock);
366 				dump_cq(chp);
367 				//pthread_spin_unlock(&chp->lock);
368 			}
369 		}
370 		for (i=0; i < dev->max_qp; i++) {
371 			if (dev->qpid2ptr[i]) {
372 				struct c4iw_qp *qhp = dev->qpid2ptr[i];
373 				//pthread_spin_lock(&qhp->lock);
374 				dump_qp(qhp);
375 				//pthread_spin_unlock(&qhp->lock);
376 			}
377 		}
378 		//pthread_spin_unlock(&dev->lock);
379 	}
380 	fprintf(stderr, "DUMP COMPLETE:\n");
381 	fflush(stderr);
382 }
383 #endif /* end of STALL_DETECTION */
384 
385 /*
386  * c4iw_abi_version is used to store ABI for iw_cxgb4 so the user mode library
387  * can know if the driver supports the kernel mode db ringing.
388  */
389 int c4iw_abi_version = 1;
390 
cxgb4_driver_init(const char * uverbs_sys_path,int abi_version)391 static struct ibv_device *cxgb4_driver_init(const char *uverbs_sys_path,
392 					    int abi_version)
393 {
394 	char devstr[IBV_SYSFS_PATH_MAX], ibdev[16], value[128], *cp;
395 	char t5nexstr[IBV_SYSFS_PATH_MAX];
396 	struct c4iw_dev *dev;
397 	unsigned vendor, device, fw_maj, fw_min;
398 	int i;
399 	char devnum=0;
400         char ib_param[16];
401 
402 #ifndef __linux__
403 	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
404 				ibdev, sizeof ibdev) < 0)
405 		return NULL;
406 	/*
407 	 * Extract the non-numeric part of ibdev
408 	 * say "t5nex0" -> devname=="t5nex", devnum=0
409 	 */
410 	if (strstr(ibdev,"t5nex")) {
411 		devnum = atoi(ibdev+strlen("t5nex"));
412 		sprintf(t5nexstr, "/dev/t5nex/%d", devnum);
413 	} else
414 		return NULL;
415 
416 	if (ibv_read_sysfs_file(t5nexstr, "\%pnpinfo",
417 				value, sizeof value) < 0)
418 		return NULL;
419 	else {
420 		if (strstr(value,"vendor=")) {
421 			strncpy(ib_param, strstr(value,"vendor=")+strlen("vendor="),6);
422 			sscanf(ib_param,"%i",&vendor);
423 		}
424 
425 		if (strstr(value,"device=")) {
426 			strncpy(ib_param, strstr(value,"device=")+strlen("device="),6);
427 			sscanf(ib_param,"%i",&device);
428 		}
429 	}
430 #else
431 	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
432 				value, sizeof value) < 0)
433 		return NULL;
434 	sscanf(value, "%i", &vendor);
435 
436 	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
437 				value, sizeof value) < 0)
438 		return NULL;
439 	sscanf(value, "%i", &device);
440 #endif
441 
442 	for (i = 0; i < sizeof hca_table / sizeof hca_table[0]; ++i)
443 		if (vendor == hca_table[i].vendor &&
444 		    device == hca_table[i].device)
445 			goto found;
446 
447 	return NULL;
448 
449 found:
450 	c4iw_abi_version = abi_version;
451 
452 
453 #ifndef __linux__
454 	if (ibv_read_sysfs_file(t5nexstr, "firmware_version",
455 				value, sizeof value) < 0)
456 		return NULL;
457 #else
458 	/*
459 	 * Verify that the firmware major number matches.  Major number
460 	 * mismatches are fatal.  Minor number mismatches are tolerated.
461 	 */
462 	if (ibv_read_sysfs_file(uverbs_sys_path, "ibdev",
463 				ibdev, sizeof ibdev) < 0)
464 		return NULL;
465 
466 	memset(devstr, 0, sizeof devstr);
467 	snprintf(devstr, sizeof devstr, "%s/class/infiniband/%s",
468 		 ibv_get_sysfs_path(), ibdev);
469 	if (ibv_read_sysfs_file(devstr, "fw_ver", value, sizeof value) < 0)
470 		return NULL;
471 #endif
472 
473 	cp = strtok(value+1, ".");
474 	sscanf(cp, "%i", &fw_maj);
475 	cp = strtok(NULL, ".");
476 	sscanf(cp, "%i", &fw_min);
477 
478 	if (fw_maj < FW_MAJ) {
479 		fprintf(stderr, "libcxgb4: Fatal firmware version mismatch.  "
480 			"Firmware major number is %u and libcxgb4 needs %u.\n",
481 			fw_maj, FW_MAJ);
482 		fflush(stderr);
483 		return NULL;
484 	}
485 
486 	DBGLOG("libcxgb4");
487 
488 	if (fw_min < FW_MIN) {
489 		PDBG("libcxgb4: non-fatal firmware version mismatch.  "
490 			"Firmware minor number is %u and libcxgb4 needs %u.\n",
491 			fw_maj, FW_MAJ);
492 		fflush(stderr);
493 	}
494 
495 	PDBG("%s found vendor %d device %d type %d\n",
496 	     __FUNCTION__, vendor, device, hca_table[i].chip_version);
497 
498 	dev = calloc(1, sizeof *dev);
499 	if (!dev) {
500 		return NULL;
501 	}
502 
503 	pthread_spin_init(&dev->lock, PTHREAD_PROCESS_PRIVATE);
504 	dev->ibv_dev.ops = c4iw_dev_ops;
505 	dev->chip_version = hca_table[i].chip_version;
506 	dev->abi_version = abi_version;
507 
508 	PDBG("%s device claimed\n", __FUNCTION__);
509 	SLIST_INSERT_HEAD(&devices, dev, list);
510 #ifdef STALL_DETECTION
511 {
512 	char *c = getenv("CXGB4_STALL_TIMEOUT");
513 	if (c) {
514 		stall_to = strtol(c, NULL, 0);
515 		if (errno || stall_to < 0)
516 			stall_to = 0;
517 	}
518 }
519 #endif
520 {
521 	char *c = getenv("CXGB4_MA_WR");
522 	if (c) {
523 		ma_wr = strtol(c, NULL, 0);
524 		if (ma_wr != 1)
525 			ma_wr = 0;
526 	}
527 }
528 {
529 	char *c = getenv("T5_ENABLE_WC");
530 	if (c) {
531 		t5_en_wc = strtol(c, NULL, 0);
532 		if (t5_en_wc != 1)
533 			t5_en_wc = 0;
534 	}
535 }
536 
537 	return &dev->ibv_dev;
538 }
539 
cxgb4_register_driver(void)540 static __attribute__((constructor)) void cxgb4_register_driver(void)
541 {
542 	c4iw_page_size = sysconf(_SC_PAGESIZE);
543 	c4iw_page_shift = long_log2(c4iw_page_size);
544 	c4iw_page_mask = ~(c4iw_page_size - 1);
545 	ibv_register_driver("cxgb4", cxgb4_driver_init);
546 }
547 
548 #ifdef STATS
549 void __attribute__ ((destructor)) cs_fini(void);
cs_fini(void)550 void  __attribute__ ((destructor)) cs_fini(void)
551 {
552 	syslog(LOG_NOTICE, "cxgb4 stats - sends %lu recv %lu read %lu "
553 	       "write %lu arm %lu cqe %lu mr %lu qp %lu cq %lu\n",
554 	       c4iw_stats.send, c4iw_stats.recv, c4iw_stats.read,
555 	       c4iw_stats.write, c4iw_stats.arm, c4iw_stats.cqe,
556 	       c4iw_stats.mr, c4iw_stats.qp, c4iw_stats.cq);
557 }
558 #endif
559