xref: /freebsd-11-stable/sys/dev/mlx5/mlx5_core/mlx5_main.c (revision 4ffbeb0414ed5911a34fea0c148e001dfed29f52)
1 /*-
2  * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include <linux/kmod.h>
29 #include <linux/module.h>
30 #include <linux/errno.h>
31 #include <linux/pci.h>
32 #include <linux/dma-mapping.h>
33 #include <linux/slab.h>
34 #include <linux/io-mapping.h>
35 #include <linux/interrupt.h>
36 #include <linux/hardirq.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/cq.h>
39 #include <dev/mlx5/qp.h>
40 #include <dev/mlx5/srq.h>
41 #include <dev/mlx5/mpfs.h>
42 #include <dev/mlx5/vport.h>
43 #include <linux/delay.h>
44 #include <dev/mlx5/mlx5_ifc.h>
45 #include <dev/mlx5/mlx5_fpga/core.h>
46 #include <dev/mlx5/mlx5_lib/mlx5.h>
47 #include "mlx5_core.h"
48 #include "eswitch.h"
49 #include "fs_core.h"
50 #ifdef PCI_IOV
51 #include <sys/nv.h>
52 #include <dev/pci/pci_iov.h>
53 #include <sys/iov_schema.h>
54 #endif
55 
56 static const char mlx5_version[] = "Mellanox Core driver "
57 	DRIVER_VERSION " (" DRIVER_RELDATE ")";
58 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
59 MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
60 MODULE_LICENSE("Dual BSD/GPL");
61 MODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
62 MODULE_DEPEND(mlx5, mlxfw, 1, 1, 1);
63 MODULE_DEPEND(mlx5, firmware, 1, 1, 1);
64 MODULE_VERSION(mlx5, 1);
65 
66 SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "mlx5 hardware controls");
67 
68 int mlx5_core_debug_mask;
69 SYSCTL_INT(_hw_mlx5, OID_AUTO, debug_mask, CTLFLAG_RWTUN,
70     &mlx5_core_debug_mask, 0,
71     "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
72 
73 #define MLX5_DEFAULT_PROF	2
74 static int mlx5_prof_sel = MLX5_DEFAULT_PROF;
75 SYSCTL_INT(_hw_mlx5, OID_AUTO, prof_sel, CTLFLAG_RWTUN,
76     &mlx5_prof_sel, 0,
77     "profile selector. Valid range 0 - 2");
78 
79 static int mlx5_fast_unload_enabled = 1;
80 SYSCTL_INT(_hw_mlx5, OID_AUTO, fast_unload_enabled, CTLFLAG_RWTUN,
81     &mlx5_fast_unload_enabled, 0,
82     "Set to enable fast unload. Clear to disable.");
83 
84 #define NUMA_NO_NODE       -1
85 
86 static LIST_HEAD(intf_list);
87 static LIST_HEAD(dev_list);
88 static DEFINE_MUTEX(intf_mutex);
89 
90 struct mlx5_device_context {
91 	struct list_head	list;
92 	struct mlx5_interface  *intf;
93 	void		       *context;
94 };
95 
96 enum {
97 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
98 	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
99 };
100 
101 static struct mlx5_profile profiles[] = {
102 	[0] = {
103 		.mask           = 0,
104 	},
105 	[1] = {
106 		.mask		= MLX5_PROF_MASK_QP_SIZE,
107 		.log_max_qp	= 12,
108 	},
109 	[2] = {
110 		.mask		= MLX5_PROF_MASK_QP_SIZE |
111 				  MLX5_PROF_MASK_MR_CACHE,
112 		.log_max_qp	= 17,
113 		.mr_cache[0]	= {
114 			.size	= 500,
115 			.limit	= 250
116 		},
117 		.mr_cache[1]	= {
118 			.size	= 500,
119 			.limit	= 250
120 		},
121 		.mr_cache[2]	= {
122 			.size	= 500,
123 			.limit	= 250
124 		},
125 		.mr_cache[3]	= {
126 			.size	= 500,
127 			.limit	= 250
128 		},
129 		.mr_cache[4]	= {
130 			.size	= 500,
131 			.limit	= 250
132 		},
133 		.mr_cache[5]	= {
134 			.size	= 500,
135 			.limit	= 250
136 		},
137 		.mr_cache[6]	= {
138 			.size	= 500,
139 			.limit	= 250
140 		},
141 		.mr_cache[7]	= {
142 			.size	= 500,
143 			.limit	= 250
144 		},
145 		.mr_cache[8]	= {
146 			.size	= 500,
147 			.limit	= 250
148 		},
149 		.mr_cache[9]	= {
150 			.size	= 500,
151 			.limit	= 250
152 		},
153 		.mr_cache[10]	= {
154 			.size	= 500,
155 			.limit	= 250
156 		},
157 		.mr_cache[11]	= {
158 			.size	= 500,
159 			.limit	= 250
160 		},
161 		.mr_cache[12]	= {
162 			.size	= 64,
163 			.limit	= 32
164 		},
165 		.mr_cache[13]	= {
166 			.size	= 32,
167 			.limit	= 16
168 		},
169 		.mr_cache[14]	= {
170 			.size	= 16,
171 			.limit	= 8
172 		},
173 	},
174 	[3] = {
175 		.mask		= MLX5_PROF_MASK_QP_SIZE,
176 		.log_max_qp	= 17,
177 	},
178 };
179 
180 #ifdef PCI_IOV
181 static const char iov_mac_addr_name[] = "mac-addr";
182 static const char iov_node_guid_name[] = "node-guid";
183 static const char iov_port_guid_name[] = "port-guid";
184 #endif
185 
set_dma_caps(struct pci_dev * pdev)186 static int set_dma_caps(struct pci_dev *pdev)
187 {
188 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
189 	int err;
190 
191 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
192 	if (err) {
193 		mlx5_core_warn(dev, "couldn't set 64-bit PCI DMA mask\n");
194 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
195 		if (err) {
196 			mlx5_core_err(dev, "Can't set PCI DMA mask, aborting\n");
197 			return err;
198 		}
199 	}
200 
201 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
202 	if (err) {
203 		mlx5_core_warn(dev, "couldn't set 64-bit consistent PCI DMA mask\n");
204 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
205 		if (err) {
206 			mlx5_core_err(dev, "Can't set consistent PCI DMA mask, aborting\n");
207 			return err;
208 		}
209 	}
210 
211 	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
212 	return err;
213 }
214 
mlx5_pci_read_power_status(struct mlx5_core_dev * dev,u16 * p_power,u8 * p_status)215 int mlx5_pci_read_power_status(struct mlx5_core_dev *dev,
216 			       u16 *p_power, u8 *p_status)
217 {
218 	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {};
219 	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {};
220 	int err;
221 
222 	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
223 	    MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0);
224 
225 	*p_status = MLX5_GET(mpein_reg, out, pwr_status);
226 	*p_power = MLX5_GET(mpein_reg, out, pci_power);
227 	return err;
228 }
229 
mlx5_pci_enable_device(struct mlx5_core_dev * dev)230 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
231 {
232 	struct pci_dev *pdev = dev->pdev;
233 	int err = 0;
234 
235 	mutex_lock(&dev->pci_status_mutex);
236 	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
237 		err = pci_enable_device(pdev);
238 		if (!err)
239 			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
240 	}
241 	mutex_unlock(&dev->pci_status_mutex);
242 
243 	return err;
244 }
245 
mlx5_pci_disable_device(struct mlx5_core_dev * dev)246 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
247 {
248 	struct pci_dev *pdev = dev->pdev;
249 
250 	mutex_lock(&dev->pci_status_mutex);
251 	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
252 		pci_disable_device(pdev);
253 		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
254 	}
255 	mutex_unlock(&dev->pci_status_mutex);
256 }
257 
request_bar(struct pci_dev * pdev)258 static int request_bar(struct pci_dev *pdev)
259 {
260 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
261 	int err = 0;
262 
263 	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
264 		mlx5_core_err(dev, "Missing registers BAR, aborting\n");
265 		return -ENODEV;
266 	}
267 
268 	err = pci_request_regions(pdev, DRIVER_NAME);
269 	if (err)
270 		mlx5_core_err(dev, "Couldn't get PCI resources, aborting\n");
271 
272 	return err;
273 }
274 
release_bar(struct pci_dev * pdev)275 static void release_bar(struct pci_dev *pdev)
276 {
277 	pci_release_regions(pdev);
278 }
279 
mlx5_enable_msix(struct mlx5_core_dev * dev)280 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
281 {
282 	struct mlx5_priv *priv = &dev->priv;
283 	struct mlx5_eq_table *table = &priv->eq_table;
284 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
285 	int limit = dev->msix_eqvec;
286 	int nvec = MLX5_EQ_VEC_COMP_BASE;
287 	int i;
288 
289 	if (limit > 0)
290 		nvec += limit;
291 	else
292 		nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus();
293 
294 	if (nvec > num_eqs)
295 		nvec = num_eqs;
296 	if (nvec > 256)
297 		nvec = 256;	/* limit of firmware API */
298 	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
299 		return -ENOMEM;
300 
301 	priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
302 
303 	for (i = 0; i < nvec; i++)
304 		priv->msix_arr[i].entry = i;
305 
306 	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
307 				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
308 	if (nvec < 0)
309 		return nvec;
310 
311 	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
312 	return 0;
313 }
314 
mlx5_disable_msix(struct mlx5_core_dev * dev)315 static void mlx5_disable_msix(struct mlx5_core_dev *dev)
316 {
317 	struct mlx5_priv *priv = &dev->priv;
318 
319 	pci_disable_msix(dev->pdev);
320 	kfree(priv->msix_arr);
321 }
322 
323 struct mlx5_reg_host_endianess {
324 	u8	he;
325 	u8      rsvd[15];
326 };
327 
328 
329 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
330 
331 enum {
332 	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
333 				MLX5_DEV_CAP_FLAG_DCT |
334 				MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
335 };
336 
to_fw_pkey_sz(struct mlx5_core_dev * dev,u32 size)337 static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
338 {
339 	switch (size) {
340 	case 128:
341 		return 0;
342 	case 256:
343 		return 1;
344 	case 512:
345 		return 2;
346 	case 1024:
347 		return 3;
348 	case 2048:
349 		return 4;
350 	case 4096:
351 		return 5;
352 	default:
353 		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
354 		return 0;
355 	}
356 }
357 
mlx5_core_get_caps_mode(struct mlx5_core_dev * dev,enum mlx5_cap_type cap_type,enum mlx5_cap_mode cap_mode)358 static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
359 				   enum mlx5_cap_type cap_type,
360 				   enum mlx5_cap_mode cap_mode)
361 {
362 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
363 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
364 	void *out, *hca_caps;
365 	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
366 	int err;
367 
368 	memset(in, 0, sizeof(in));
369 	out = kzalloc(out_sz, GFP_KERNEL);
370 
371 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
372 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
373 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
374 	if (err) {
375 		mlx5_core_warn(dev,
376 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
377 			       cap_type, cap_mode, err);
378 		goto query_ex;
379 	}
380 
381 	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
382 
383 	switch (cap_mode) {
384 	case HCA_CAP_OPMOD_GET_MAX:
385 		memcpy(dev->hca_caps_max[cap_type], hca_caps,
386 		       MLX5_UN_SZ_BYTES(hca_cap_union));
387 		break;
388 	case HCA_CAP_OPMOD_GET_CUR:
389 		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
390 		       MLX5_UN_SZ_BYTES(hca_cap_union));
391 		break;
392 	default:
393 		mlx5_core_warn(dev,
394 			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
395 			       cap_type, cap_mode);
396 		err = -EINVAL;
397 		break;
398 	}
399 query_ex:
400 	kfree(out);
401 	return err;
402 }
403 
mlx5_core_get_caps(struct mlx5_core_dev * dev,enum mlx5_cap_type cap_type)404 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
405 {
406 	int ret;
407 
408 	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
409 	if (ret)
410 		return ret;
411 
412 	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
413 }
414 
set_caps(struct mlx5_core_dev * dev,void * in,int in_sz)415 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
416 {
417 	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
418 
419 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
420 
421 	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
422 }
423 
handle_hca_cap(struct mlx5_core_dev * dev)424 static int handle_hca_cap(struct mlx5_core_dev *dev)
425 {
426 	void *set_ctx = NULL;
427 	struct mlx5_profile *prof = dev->profile;
428 	int err = -ENOMEM;
429 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
430 	void *set_hca_cap;
431 
432 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
433 
434 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
435 	if (err)
436 		goto query_ex;
437 
438 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
439 				   capability);
440 	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
441 	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
442 
443 	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
444 		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
445 		      128);
446 	/* we limit the size of the pkey table to 128 entries for now */
447 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
448 		 to_fw_pkey_sz(dev, 128));
449 
450 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
451 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
452 			 prof->log_max_qp);
453 
454 	/* disable cmdif checksum */
455 	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
456 
457 	/* enable drain sigerr */
458 	MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
459 
460 	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
461 
462 	err = set_caps(dev, set_ctx, set_sz);
463 
464 query_ex:
465 	kfree(set_ctx);
466 	return err;
467 }
468 
handle_hca_cap_atomic(struct mlx5_core_dev * dev)469 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
470 {
471 	void *set_ctx;
472 	void *set_hca_cap;
473 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
474 	int req_endianness;
475 	int err;
476 
477 	if (MLX5_CAP_GEN(dev, atomic)) {
478 		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
479 		if (err)
480 			return err;
481 	} else {
482 		return 0;
483 	}
484 
485 	req_endianness =
486 		MLX5_CAP_ATOMIC(dev,
487 				supported_atomic_req_8B_endianess_mode_1);
488 
489 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
490 		return 0;
491 
492 	set_ctx = kzalloc(set_sz, GFP_KERNEL);
493 	if (!set_ctx)
494 		return -ENOMEM;
495 
496 	MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
497 		 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
498 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
499 
500 	/* Set requestor to host endianness */
501 	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
502 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
503 
504 	err = set_caps(dev, set_ctx, set_sz);
505 
506 	kfree(set_ctx);
507 	return err;
508 }
509 
set_hca_ctrl(struct mlx5_core_dev * dev)510 static int set_hca_ctrl(struct mlx5_core_dev *dev)
511 {
512 	struct mlx5_reg_host_endianess he_in;
513 	struct mlx5_reg_host_endianess he_out;
514 	int err;
515 
516 	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
517 	    !MLX5_CAP_GEN(dev, roce))
518 		return 0;
519 
520 	memset(&he_in, 0, sizeof(he_in));
521 	he_in.he = MLX5_SET_HOST_ENDIANNESS;
522 	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
523 					&he_out, sizeof(he_out),
524 					MLX5_REG_HOST_ENDIANNESS, 0, 1);
525 	return err;
526 }
527 
mlx5_core_enable_hca(struct mlx5_core_dev * dev,u16 func_id)528 static int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
529 {
530 	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
531 	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
532 
533 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
534 	MLX5_SET(enable_hca_in, in, function_id, func_id);
535 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
536 }
537 
mlx5_core_disable_hca(struct mlx5_core_dev * dev)538 static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
539 {
540 	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
541 	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
542 
543 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
544 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
545 }
546 
mlx5_core_set_issi(struct mlx5_core_dev * dev)547 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
548 {
549 	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
550 	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
551 	u32 sup_issi;
552 	int err;
553 
554 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
555 
556 	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
557 	if (err) {
558 		u32 syndrome;
559 		u8 status;
560 
561 		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
562 		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
563 			mlx5_core_dbg(dev, "Only ISSI 0 is supported\n");
564 			return 0;
565 		}
566 
567 		mlx5_core_err(dev, "failed to query ISSI\n");
568 		return err;
569 	}
570 
571 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
572 
573 	if (sup_issi & (1 << 1)) {
574 		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]	 = {0};
575 		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
576 
577 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
578 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
579 
580 		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
581 		if (err) {
582 			mlx5_core_err(dev, "failed to set ISSI=1 err(%d)\n", err);
583 			return err;
584 		}
585 
586 		dev->issi = 1;
587 
588 		return 0;
589 	} else if (sup_issi & (1 << 0)) {
590 		return 0;
591 	}
592 
593 	return -ENOTSUPP;
594 }
595 
596 
mlx5_vector2eqn(struct mlx5_core_dev * dev,int vector,int * eqn,int * irqn)597 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
598 {
599 	struct mlx5_eq_table *table = &dev->priv.eq_table;
600 	struct mlx5_eq *eq;
601 	int err = -ENOENT;
602 
603 	spin_lock(&table->lock);
604 	list_for_each_entry(eq, &table->comp_eqs_list, list) {
605 		if (eq->index == vector) {
606 			*eqn = eq->eqn;
607 			*irqn = eq->irqn;
608 			err = 0;
609 			break;
610 		}
611 	}
612 	spin_unlock(&table->lock);
613 
614 	return err;
615 }
616 EXPORT_SYMBOL(mlx5_vector2eqn);
617 
free_comp_eqs(struct mlx5_core_dev * dev)618 static void free_comp_eqs(struct mlx5_core_dev *dev)
619 {
620 	struct mlx5_eq_table *table = &dev->priv.eq_table;
621 	struct mlx5_eq *eq, *n;
622 
623 	spin_lock(&table->lock);
624 	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
625 		list_del(&eq->list);
626 		spin_unlock(&table->lock);
627 		if (mlx5_destroy_unmap_eq(dev, eq))
628 			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
629 				       eq->eqn);
630 		kfree(eq);
631 		spin_lock(&table->lock);
632 	}
633 	spin_unlock(&table->lock);
634 }
635 
alloc_comp_eqs(struct mlx5_core_dev * dev)636 static int alloc_comp_eqs(struct mlx5_core_dev *dev)
637 {
638 	struct mlx5_eq_table *table = &dev->priv.eq_table;
639 	struct mlx5_eq *eq;
640 	int ncomp_vec;
641 	int nent;
642 	int err;
643 	int i;
644 
645 	INIT_LIST_HEAD(&table->comp_eqs_list);
646 	ncomp_vec = table->num_comp_vectors;
647 	nent = MLX5_COMP_EQ_SIZE;
648 	for (i = 0; i < ncomp_vec; i++) {
649 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
650 
651 		err = mlx5_create_map_eq(dev, eq,
652 					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
653 					 &dev->priv.uuari.uars[0]);
654 		if (err) {
655 			kfree(eq);
656 			goto clean;
657 		}
658 		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
659 		eq->index = i;
660 		spin_lock(&table->lock);
661 		list_add_tail(&eq->list, &table->comp_eqs_list);
662 		spin_unlock(&table->lock);
663 	}
664 
665 	return 0;
666 
667 clean:
668 	free_comp_eqs(dev);
669 	return err;
670 }
671 
map_bf_area(struct mlx5_core_dev * dev)672 static int map_bf_area(struct mlx5_core_dev *dev)
673 {
674 	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
675 	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
676 
677 	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
678 
679 	return dev->priv.bf_mapping ? 0 : -ENOMEM;
680 }
681 
unmap_bf_area(struct mlx5_core_dev * dev)682 static void unmap_bf_area(struct mlx5_core_dev *dev)
683 {
684 	if (dev->priv.bf_mapping)
685 		io_mapping_free(dev->priv.bf_mapping);
686 }
687 
fw_initializing(struct mlx5_core_dev * dev)688 static inline int fw_initializing(struct mlx5_core_dev *dev)
689 {
690 	return ioread32be(&dev->iseg->initializing) >> 31;
691 }
692 
wait_fw_init(struct mlx5_core_dev * dev,u32 max_wait_mili,u32 warn_time_mili)693 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
694 			u32 warn_time_mili)
695 {
696 	int warn = jiffies + msecs_to_jiffies(warn_time_mili);
697 	int end = jiffies + msecs_to_jiffies(max_wait_mili);
698 	int err = 0;
699 
700 	MPASS(max_wait_mili > warn_time_mili);
701 
702 	while (fw_initializing(dev) == 1) {
703 		if (time_after(jiffies, end)) {
704 			err = -EBUSY;
705 			break;
706 		}
707 		if (warn_time_mili && time_after(jiffies, warn)) {
708 			mlx5_core_warn(dev,
709 			    "Waiting for FW initialization, timeout abort in %u s\n",
710 			    (unsigned)(jiffies_to_msecs(end - warn) / 1000));
711 			warn = jiffies + msecs_to_jiffies(warn_time_mili);
712 		}
713 		msleep(FW_INIT_WAIT_MS);
714 	}
715 
716 	if (err != 0)
717 		mlx5_core_dbg(dev, "Full initializing bit dword = 0x%x\n",
718 		    ioread32be(&dev->iseg->initializing));
719 
720 	return err;
721 }
722 
mlx5_add_device(struct mlx5_interface * intf,struct mlx5_priv * priv)723 static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
724 {
725 	struct mlx5_device_context *dev_ctx;
726 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
727 
728 	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
729 	if (!dev_ctx)
730 		return;
731 
732 	dev_ctx->intf    = intf;
733 	CURVNET_SET_QUIET(vnet0);
734 	dev_ctx->context = intf->add(dev);
735 	CURVNET_RESTORE();
736 
737 	if (dev_ctx->context) {
738 		spin_lock_irq(&priv->ctx_lock);
739 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
740 		spin_unlock_irq(&priv->ctx_lock);
741 	} else {
742 		kfree(dev_ctx);
743 	}
744 }
745 
mlx5_remove_device(struct mlx5_interface * intf,struct mlx5_priv * priv)746 static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
747 {
748 	struct mlx5_device_context *dev_ctx;
749 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
750 
751 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
752 		if (dev_ctx->intf == intf) {
753 			spin_lock_irq(&priv->ctx_lock);
754 			list_del(&dev_ctx->list);
755 			spin_unlock_irq(&priv->ctx_lock);
756 
757 			intf->remove(dev, dev_ctx->context);
758 			kfree(dev_ctx);
759 			return;
760 		}
761 }
762 
763 int
mlx5_register_device(struct mlx5_core_dev * dev)764 mlx5_register_device(struct mlx5_core_dev *dev)
765 {
766 	struct mlx5_priv *priv = &dev->priv;
767 	struct mlx5_interface *intf;
768 
769 	mutex_lock(&intf_mutex);
770 	list_add_tail(&priv->dev_list, &dev_list);
771 	list_for_each_entry(intf, &intf_list, list)
772 		mlx5_add_device(intf, priv);
773 	mutex_unlock(&intf_mutex);
774 
775 	return 0;
776 }
777 
778 void
mlx5_unregister_device(struct mlx5_core_dev * dev)779 mlx5_unregister_device(struct mlx5_core_dev *dev)
780 {
781 	struct mlx5_priv *priv = &dev->priv;
782 	struct mlx5_interface *intf;
783 
784 	mutex_lock(&intf_mutex);
785 	list_for_each_entry(intf, &intf_list, list)
786 		mlx5_remove_device(intf, priv);
787 	list_del(&priv->dev_list);
788 	mutex_unlock(&intf_mutex);
789 }
790 
mlx5_register_interface(struct mlx5_interface * intf)791 int mlx5_register_interface(struct mlx5_interface *intf)
792 {
793 	struct mlx5_priv *priv;
794 
795 	if (!intf->add || !intf->remove)
796 		return -EINVAL;
797 
798 	mutex_lock(&intf_mutex);
799 	list_add_tail(&intf->list, &intf_list);
800 	list_for_each_entry(priv, &dev_list, dev_list)
801 		mlx5_add_device(intf, priv);
802 	mutex_unlock(&intf_mutex);
803 
804 	return 0;
805 }
806 EXPORT_SYMBOL(mlx5_register_interface);
807 
mlx5_unregister_interface(struct mlx5_interface * intf)808 void mlx5_unregister_interface(struct mlx5_interface *intf)
809 {
810 	struct mlx5_priv *priv;
811 
812 	mutex_lock(&intf_mutex);
813 	list_for_each_entry(priv, &dev_list, dev_list)
814 		mlx5_remove_device(intf, priv);
815 	list_del(&intf->list);
816 	mutex_unlock(&intf_mutex);
817 }
818 EXPORT_SYMBOL(mlx5_unregister_interface);
819 
mlx5_get_protocol_dev(struct mlx5_core_dev * mdev,int protocol)820 void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
821 {
822 	struct mlx5_priv *priv = &mdev->priv;
823 	struct mlx5_device_context *dev_ctx;
824 	unsigned long flags;
825 	void *result = NULL;
826 
827 	spin_lock_irqsave(&priv->ctx_lock, flags);
828 
829 	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
830 		if ((dev_ctx->intf->protocol == protocol) &&
831 		    dev_ctx->intf->get_dev) {
832 			result = dev_ctx->intf->get_dev(dev_ctx->context);
833 			break;
834 		}
835 
836 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
837 
838 	return result;
839 }
840 EXPORT_SYMBOL(mlx5_get_protocol_dev);
841 
842 static int mlx5_auto_fw_update;
843 SYSCTL_INT(_hw_mlx5, OID_AUTO, auto_fw_update, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
844     &mlx5_auto_fw_update, 0,
845     "Allow automatic firmware update on driver start");
846 static int
mlx5_firmware_update(struct mlx5_core_dev * dev)847 mlx5_firmware_update(struct mlx5_core_dev *dev)
848 {
849 	const struct firmware *fw;
850 	int err;
851 
852 	TUNABLE_INT_FETCH("hw.mlx5.auto_fw_update", &mlx5_auto_fw_update);
853 	if (!mlx5_auto_fw_update)
854 		return (0);
855 	fw = firmware_get("mlx5fw_mfa");
856 	if (fw) {
857 		err = mlx5_firmware_flash(dev, fw);
858 		firmware_put(fw, FIRMWARE_UNLOAD);
859 	}
860 	else
861 		return (-ENOENT);
862 
863 	return err;
864 }
865 
mlx5_pci_init(struct mlx5_core_dev * dev,struct mlx5_priv * priv)866 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
867 {
868 	struct pci_dev *pdev = dev->pdev;
869 	device_t bsddev;
870 	int err;
871 
872 	pdev = dev->pdev;
873 	bsddev = pdev->dev.bsddev;
874 	pci_set_drvdata(dev->pdev, dev);
875 	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
876 	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
877 
878 	mutex_init(&priv->pgdir_mutex);
879 	INIT_LIST_HEAD(&priv->pgdir_list);
880 	spin_lock_init(&priv->mkey_lock);
881 
882 	priv->numa_node = NUMA_NO_NODE;
883 
884 	err = mlx5_pci_enable_device(dev);
885 	if (err) {
886 		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
887 		goto err_dbg;
888 	}
889 
890 	err = request_bar(pdev);
891 	if (err) {
892 		mlx5_core_err(dev, "error requesting BARs, aborting\n");
893 		goto err_disable;
894 	}
895 
896 	pci_set_master(pdev);
897 
898 	err = set_dma_caps(pdev);
899 	if (err) {
900 		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
901 		goto err_clr_master;
902 	}
903 
904 	dev->iseg_base = pci_resource_start(dev->pdev, 0);
905 	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
906 	if (!dev->iseg) {
907 		err = -ENOMEM;
908 		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
909 		goto err_clr_master;
910 	}
911 
912 	return 0;
913 
914 err_clr_master:
915 	release_bar(dev->pdev);
916 err_disable:
917 	mlx5_pci_disable_device(dev);
918 err_dbg:
919 	return err;
920 }
921 
mlx5_pci_close(struct mlx5_core_dev * dev,struct mlx5_priv * priv)922 static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
923 {
924 #ifdef PCI_IOV
925 	if (MLX5_CAP_GEN(dev, eswitch_flow_table))
926 		pci_iov_detach(dev->pdev->dev.bsddev);
927 #endif
928 	iounmap(dev->iseg);
929 	release_bar(dev->pdev);
930 	mlx5_pci_disable_device(dev);
931 }
932 
mlx5_init_once(struct mlx5_core_dev * dev,struct mlx5_priv * priv)933 static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
934 {
935 	int err;
936 
937 	err = mlx5_vsc_find_cap(dev);
938 	if (err)
939 		mlx5_core_warn(dev, "Unable to find vendor specific capabilities\n");
940 
941 	err = mlx5_query_hca_caps(dev);
942 	if (err) {
943 		mlx5_core_err(dev, "query hca failed\n");
944 		goto out;
945 	}
946 
947 	err = mlx5_query_board_id(dev);
948 	if (err) {
949 		mlx5_core_err(dev, "query board id failed\n");
950 		goto out;
951 	}
952 
953 	err = mlx5_eq_init(dev);
954 	if (err) {
955 		mlx5_core_err(dev, "failed to initialize eq\n");
956 		goto out;
957 	}
958 
959 	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
960 
961 	err = mlx5_init_cq_table(dev);
962 	if (err) {
963 		mlx5_core_err(dev, "failed to initialize cq table\n");
964 		goto err_eq_cleanup;
965 	}
966 
967 	mlx5_init_qp_table(dev);
968 	mlx5_init_srq_table(dev);
969 	mlx5_init_mr_table(dev);
970 
971 	mlx5_init_reserved_gids(dev);
972 	mlx5_fpga_init(dev);
973 
974 	return 0;
975 
976 err_eq_cleanup:
977 	mlx5_eq_cleanup(dev);
978 
979 out:
980 	return err;
981 }
982 
mlx5_cleanup_once(struct mlx5_core_dev * dev)983 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
984 {
985 	mlx5_fpga_cleanup(dev);
986 	mlx5_cleanup_reserved_gids(dev);
987 	mlx5_cleanup_mr_table(dev);
988 	mlx5_cleanup_srq_table(dev);
989 	mlx5_cleanup_qp_table(dev);
990 	mlx5_cleanup_cq_table(dev);
991 	mlx5_eq_cleanup(dev);
992 }
993 
mlx5_load_one(struct mlx5_core_dev * dev,struct mlx5_priv * priv,bool boot)994 static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
995 			 bool boot)
996 {
997 	int err;
998 
999 	mutex_lock(&dev->intf_state_mutex);
1000 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1001 		mlx5_core_warn(dev, "interface is up, NOP\n");
1002 		goto out;
1003 	}
1004 
1005 	mlx5_core_dbg(dev, "firmware version: %d.%d.%d\n",
1006 	    fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
1007 
1008 	/*
1009 	 * On load removing any previous indication of internal error,
1010 	 * device is up
1011 	 */
1012 	dev->state = MLX5_DEVICE_STATE_UP;
1013 
1014 	/* wait for firmware to accept initialization segments configurations
1015 	*/
1016 	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI,
1017 	    FW_INIT_WARN_MESSAGE_INTERVAL);
1018 	if (err) {
1019 		dev_err(&dev->pdev->dev,
1020 		    "Firmware over %d MS in pre-initializing state, aborting\n",
1021 		    FW_PRE_INIT_TIMEOUT_MILI);
1022 		goto out_err;
1023 	}
1024 
1025 	err = mlx5_cmd_init(dev);
1026 	if (err) {
1027 		mlx5_core_err(dev,
1028 		    "Failed initializing command interface, aborting\n");
1029 		goto out_err;
1030 	}
1031 
1032 	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
1033 	if (err) {
1034 		mlx5_core_err(dev,
1035 		    "Firmware over %d MS in initializing state, aborting\n",
1036 		    FW_INIT_TIMEOUT_MILI);
1037 		goto err_cmd_cleanup;
1038 	}
1039 
1040 	err = mlx5_core_enable_hca(dev, 0);
1041 	if (err) {
1042 		mlx5_core_err(dev, "enable hca failed\n");
1043 		goto err_cmd_cleanup;
1044 	}
1045 
1046 	err = mlx5_core_set_issi(dev);
1047 	if (err) {
1048 		mlx5_core_err(dev, "failed to set issi\n");
1049 		goto err_disable_hca;
1050 	}
1051 
1052 	err = mlx5_pagealloc_start(dev);
1053 	if (err) {
1054 		mlx5_core_err(dev, "mlx5_pagealloc_start failed\n");
1055 		goto err_disable_hca;
1056 	}
1057 
1058 	err = mlx5_satisfy_startup_pages(dev, 1);
1059 	if (err) {
1060 		mlx5_core_err(dev, "failed to allocate boot pages\n");
1061 		goto err_pagealloc_stop;
1062 	}
1063 
1064 	err = set_hca_ctrl(dev);
1065 	if (err) {
1066 		mlx5_core_err(dev, "set_hca_ctrl failed\n");
1067 		goto reclaim_boot_pages;
1068 	}
1069 
1070 	err = handle_hca_cap(dev);
1071 	if (err) {
1072 		mlx5_core_err(dev, "handle_hca_cap failed\n");
1073 		goto reclaim_boot_pages;
1074 	}
1075 
1076 	err = handle_hca_cap_atomic(dev);
1077 	if (err) {
1078 		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
1079 		goto reclaim_boot_pages;
1080 	}
1081 
1082 	err = mlx5_satisfy_startup_pages(dev, 0);
1083 	if (err) {
1084 		mlx5_core_err(dev, "failed to allocate init pages\n");
1085 		goto reclaim_boot_pages;
1086 	}
1087 
1088 	err = mlx5_cmd_init_hca(dev);
1089 	if (err) {
1090 		mlx5_core_err(dev, "init hca failed\n");
1091 		goto reclaim_boot_pages;
1092 	}
1093 
1094 	mlx5_start_health_poll(dev);
1095 
1096 	if (boot && mlx5_init_once(dev, priv)) {
1097 		mlx5_core_err(dev, "sw objs init failed\n");
1098 		goto err_stop_poll;
1099 	}
1100 
1101 	err = mlx5_enable_msix(dev);
1102 	if (err) {
1103 		mlx5_core_err(dev, "enable msix failed\n");
1104 		goto err_cleanup_once;
1105 	}
1106 
1107 	err = mlx5_alloc_uuars(dev, &priv->uuari);
1108 	if (err) {
1109 		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1110 		goto err_disable_msix;
1111 	}
1112 
1113 	err = mlx5_start_eqs(dev);
1114 	if (err) {
1115 		mlx5_core_err(dev, "Failed to start pages and async EQs\n");
1116 		goto err_free_uar;
1117 	}
1118 
1119 	err = alloc_comp_eqs(dev);
1120 	if (err) {
1121 		mlx5_core_err(dev, "Failed to alloc completion EQs\n");
1122 		goto err_stop_eqs;
1123 	}
1124 
1125 	if (map_bf_area(dev))
1126 		mlx5_core_err(dev, "Failed to map blue flame area\n");
1127 
1128 	err = mlx5_init_fs(dev);
1129 	if (err) {
1130 		mlx5_core_err(dev, "flow steering init %d\n", err);
1131 		goto err_free_comp_eqs;
1132 	}
1133 
1134 	err = mlx5_mpfs_init(dev);
1135 	if (err) {
1136 		mlx5_core_err(dev, "mpfs init failed %d\n", err);
1137 		goto err_fs;
1138 	}
1139 
1140 	err = mlx5_fpga_device_start(dev);
1141 	if (err) {
1142 		mlx5_core_err(dev, "fpga device start failed %d\n", err);
1143 		goto err_mpfs;
1144 	}
1145 
1146 	err = mlx5_register_device(dev);
1147 	if (err) {
1148 		mlx5_core_err(dev, "mlx5_register_device failed %d\n", err);
1149 		goto err_fpga;
1150 	}
1151 
1152 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1153 
1154 out:
1155 	mutex_unlock(&dev->intf_state_mutex);
1156 	return 0;
1157 
1158 err_fpga:
1159 	mlx5_fpga_device_stop(dev);
1160 
1161 err_mpfs:
1162 	mlx5_mpfs_destroy(dev);
1163 
1164 err_fs:
1165 	mlx5_cleanup_fs(dev);
1166 
1167 err_free_comp_eqs:
1168 	free_comp_eqs(dev);
1169 	unmap_bf_area(dev);
1170 
1171 err_stop_eqs:
1172 	mlx5_stop_eqs(dev);
1173 
1174 err_free_uar:
1175 	mlx5_free_uuars(dev, &priv->uuari);
1176 
1177 err_disable_msix:
1178 	mlx5_disable_msix(dev);
1179 
1180 err_cleanup_once:
1181 	if (boot)
1182 		mlx5_cleanup_once(dev);
1183 
1184 err_stop_poll:
1185 	mlx5_stop_health_poll(dev, boot);
1186 	if (mlx5_cmd_teardown_hca(dev)) {
1187 		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1188 		goto out_err;
1189 	}
1190 
1191 reclaim_boot_pages:
1192 	mlx5_reclaim_startup_pages(dev);
1193 
1194 err_pagealloc_stop:
1195 	mlx5_pagealloc_stop(dev);
1196 
1197 err_disable_hca:
1198 	mlx5_core_disable_hca(dev);
1199 
1200 err_cmd_cleanup:
1201 	mlx5_cmd_cleanup(dev);
1202 
1203 out_err:
1204 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1205 	mutex_unlock(&dev->intf_state_mutex);
1206 
1207 	return err;
1208 }
1209 
mlx5_unload_one(struct mlx5_core_dev * dev,struct mlx5_priv * priv,bool cleanup)1210 static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1211 			   bool cleanup)
1212 {
1213 	int err = 0;
1214 
1215 	if (cleanup)
1216 		mlx5_drain_health_recovery(dev);
1217 
1218 	mutex_lock(&dev->intf_state_mutex);
1219 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1220 		mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__);
1221                 if (cleanup)
1222                         mlx5_cleanup_once(dev);
1223 		goto out;
1224 	}
1225 
1226 	mlx5_unregister_device(dev);
1227 
1228 	mlx5_eswitch_cleanup(dev->priv.eswitch);
1229 	mlx5_fpga_device_stop(dev);
1230 	mlx5_mpfs_destroy(dev);
1231 	mlx5_cleanup_fs(dev);
1232 	unmap_bf_area(dev);
1233 	mlx5_wait_for_reclaim_vfs_pages(dev);
1234 	free_comp_eqs(dev);
1235 	mlx5_stop_eqs(dev);
1236 	mlx5_free_uuars(dev, &priv->uuari);
1237 	mlx5_disable_msix(dev);
1238         if (cleanup)
1239                 mlx5_cleanup_once(dev);
1240 	mlx5_stop_health_poll(dev, cleanup);
1241 	err = mlx5_cmd_teardown_hca(dev);
1242 	if (err) {
1243 		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1244 		goto out;
1245 	}
1246 	mlx5_pagealloc_stop(dev);
1247 	mlx5_reclaim_startup_pages(dev);
1248 	mlx5_core_disable_hca(dev);
1249 	mlx5_cmd_cleanup(dev);
1250 
1251 out:
1252 	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1253 	mutex_unlock(&dev->intf_state_mutex);
1254 	return err;
1255 }
1256 
mlx5_core_event(struct mlx5_core_dev * dev,enum mlx5_dev_event event,unsigned long param)1257 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1258 		     unsigned long param)
1259 {
1260 	struct mlx5_priv *priv = &dev->priv;
1261 	struct mlx5_device_context *dev_ctx;
1262 	unsigned long flags;
1263 
1264 	spin_lock_irqsave(&priv->ctx_lock, flags);
1265 
1266 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1267 		if (dev_ctx->intf->event)
1268 			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1269 
1270 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1271 }
1272 
1273 struct mlx5_core_event_handler {
1274 	void (*event)(struct mlx5_core_dev *dev,
1275 		      enum mlx5_dev_event event,
1276 		      void *data);
1277 };
1278 
1279 #define	MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
1280 
1281 #define	MLX5_PORT_MODULE_ERROR_STATS(m)				\
1282 m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
1283 m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
1284 m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
1285 m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
1286 m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
1287 m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
1288 m(+1, u64, high_temp, "high_temp", "Module High Temperature") \
1289 m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") \
1290 m(+1, u64, pmd_type_not_enabled, "pmd_type_not_enabled", "PMD type is not enabled") \
1291 m(+1, u64, laster_tec_failure, "laster_tec_failure", "Laster TEC failure") \
1292 m(+1, u64, high_current, "high_current", "High current") \
1293 m(+1, u64, high_voltage, "high_voltage", "High voltage") \
1294 m(+1, u64, pcie_sys_power_slot_exceeded, "pcie_sys_power_slot_exceeded", "PCIe system power slot Exceeded") \
1295 m(+1, u64, high_power, "high_power", "High power")			\
1296 m(+1, u64, module_state_machine_fault, "module_state_machine_fault", "Module State Machine fault")
1297 
1298 static const char *mlx5_pme_err_desc[] = {
1299 	MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
1300 };
1301 
init_one(struct pci_dev * pdev,const struct pci_device_id * id)1302 static int init_one(struct pci_dev *pdev,
1303 		    const struct pci_device_id *id)
1304 {
1305 	struct mlx5_core_dev *dev;
1306 	struct mlx5_priv *priv;
1307 	device_t bsddev = pdev->dev.bsddev;
1308 #ifdef PCI_IOV
1309 	nvlist_t *pf_schema, *vf_schema;
1310 	int num_vfs, sriov_pos;
1311 #endif
1312 	int i,err;
1313 	struct sysctl_oid *pme_sysctl_node;
1314 	struct sysctl_oid *pme_err_sysctl_node;
1315 	struct sysctl_oid *cap_sysctl_node;
1316 	struct sysctl_oid *current_cap_sysctl_node;
1317 	struct sysctl_oid *max_cap_sysctl_node;
1318 
1319 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1320 	priv = &dev->priv;
1321 	if (id)
1322 		priv->pci_dev_data = id->driver_data;
1323 
1324 	if (mlx5_prof_sel < 0 || mlx5_prof_sel >= ARRAY_SIZE(profiles)) {
1325 		device_printf(bsddev,
1326 		    "WARN: selected profile out of range, selecting default (%d)\n",
1327 		    MLX5_DEFAULT_PROF);
1328 		mlx5_prof_sel = MLX5_DEFAULT_PROF;
1329 	}
1330 	dev->profile = &profiles[mlx5_prof_sel];
1331 	dev->pdev = pdev;
1332 	dev->event = mlx5_core_event;
1333 
1334 	/* Set desc */
1335 	device_set_desc(bsddev, mlx5_version);
1336 
1337 	sysctl_ctx_init(&dev->sysctl_ctx);
1338 	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1339 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1340 	    OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
1341 	    "Maximum number of MSIX event queue vectors, if set");
1342 	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1343 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1344 	    OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0,
1345 	    "0:Invalid 1:Sufficient 2:Insufficient");
1346 	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1347 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1348 	    OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
1349 	    "Current power value in Watts");
1350 
1351 	pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1352 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1353 	    OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
1354 	    "Port module event statistics");
1355 	if (pme_sysctl_node == NULL) {
1356 		err = -ENOMEM;
1357 		goto clean_sysctl_ctx;
1358 	}
1359 	pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1360 	    SYSCTL_CHILDREN(pme_sysctl_node),
1361 	    OID_AUTO, "errors", CTLFLAG_RD, NULL,
1362 	    "Port module event error statistics");
1363 	if (pme_err_sysctl_node == NULL) {
1364 		err = -ENOMEM;
1365 		goto clean_sysctl_ctx;
1366 	}
1367 	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1368 	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1369 	    "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1370 	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
1371 	    0, "Number of time module plugged");
1372 	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1373 	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1374 	    "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1375 	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
1376 	    0, "Number of time module unplugged");
1377 	for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
1378 		SYSCTL_ADD_U64(&dev->sysctl_ctx,
1379 		    SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
1380 		    mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
1381 		    &dev->priv.pme_stats.error_counters[i],
1382 		    0, mlx5_pme_err_desc[2 * i + 1]);
1383 	}
1384 
1385 	cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1386 	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1387 	    OID_AUTO, "caps", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1388 	    "hardware capabilities raw bitstrings");
1389 	if (cap_sysctl_node == NULL) {
1390 		err = -ENOMEM;
1391 		goto clean_sysctl_ctx;
1392 	}
1393 	current_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1394 	    SYSCTL_CHILDREN(cap_sysctl_node),
1395 	    OID_AUTO, "current", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1396 	    "");
1397 	if (current_cap_sysctl_node == NULL) {
1398 		err = -ENOMEM;
1399 		goto clean_sysctl_ctx;
1400 	}
1401 	max_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1402 	    SYSCTL_CHILDREN(cap_sysctl_node),
1403 	    OID_AUTO, "max", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1404 	    "");
1405 	if (max_cap_sysctl_node == NULL) {
1406 		err = -ENOMEM;
1407 		goto clean_sysctl_ctx;
1408 	}
1409 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1410 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1411 	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1412 	    &dev->hca_caps_cur[MLX5_CAP_GENERAL],
1413 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1414 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1415 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1416 	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1417 	    &dev->hca_caps_max[MLX5_CAP_GENERAL],
1418 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1419 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1420 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1421 	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1422 	    &dev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS],
1423 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1424 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1425 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1426 	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1427 	    &dev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS],
1428 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1429 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1430 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1431 	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1432 	    &dev->hca_caps_cur[MLX5_CAP_ODP],
1433 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1434 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1435 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1436 	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1437 	    &dev->hca_caps_max[MLX5_CAP_ODP],
1438 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1439 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1440 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1441 	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1442 	    &dev->hca_caps_cur[MLX5_CAP_ATOMIC],
1443 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1444 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1445 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1446 	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1447 	    &dev->hca_caps_max[MLX5_CAP_ATOMIC],
1448 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1449 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1450 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1451 	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1452 	    &dev->hca_caps_cur[MLX5_CAP_ROCE],
1453 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1454 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1455 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1456 	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1457 	    &dev->hca_caps_max[MLX5_CAP_ROCE],
1458 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1459 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1460 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1461 	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1462 	    &dev->hca_caps_cur[MLX5_CAP_IPOIB_OFFLOADS],
1463 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1464 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1465 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1466 	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1467 	    &dev->hca_caps_max[MLX5_CAP_IPOIB_OFFLOADS],
1468 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1469 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1470 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1471 	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1472 	    &dev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS],
1473 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1474 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1475 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1476 	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1477 	    &dev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS],
1478 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1479 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1480 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1481 	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1482 	    &dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE],
1483 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1484 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1485 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1486 	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1487 	    &dev->hca_caps_max[MLX5_CAP_FLOW_TABLE],
1488 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1489 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1490 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1491 	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1492 	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE],
1493 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1494 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1495 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1496 	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1497 	    &dev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE],
1498 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1499 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1500 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1501 	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1502 	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH],
1503 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1504 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1505 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1506 	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1507 	    &dev->hca_caps_max[MLX5_CAP_ESWITCH],
1508 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1509 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1510 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1511 	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1512 	    &dev->hca_caps_cur[MLX5_CAP_SNAPSHOT],
1513 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1514 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1515 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1516 	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1517 	    &dev->hca_caps_max[MLX5_CAP_SNAPSHOT],
1518 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1519 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1520 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1521 	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1522 	    &dev->hca_caps_cur[MLX5_CAP_VECTOR_CALC],
1523 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1524 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1525 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1526 	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1527 	    &dev->hca_caps_max[MLX5_CAP_VECTOR_CALC],
1528 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1529 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1530 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1531 	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1532 	    &dev->hca_caps_cur[MLX5_CAP_QOS],
1533 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1534 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1535 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1536 	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1537 	    &dev->hca_caps_max[MLX5_CAP_QOS],
1538 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1539 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1540 	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1541 	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1542 	    &dev->hca_caps_cur[MLX5_CAP_DEBUG],
1543 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1544 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1545 	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1546 	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1547 	    &dev->hca_caps_max[MLX5_CAP_DEBUG],
1548 	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1549 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1550 	    SYSCTL_CHILDREN(cap_sysctl_node),
1551 	    OID_AUTO, "pcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1552 	    &dev->caps.pcam, sizeof(dev->caps.pcam), "IU", "");
1553 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1554 	    SYSCTL_CHILDREN(cap_sysctl_node),
1555 	    OID_AUTO, "mcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1556 	    &dev->caps.mcam, sizeof(dev->caps.mcam), "IU", "");
1557 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1558 	    SYSCTL_CHILDREN(cap_sysctl_node),
1559 	    OID_AUTO, "qcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1560 	    &dev->caps.qcam, sizeof(dev->caps.qcam), "IU", "");
1561 	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1562 	    SYSCTL_CHILDREN(cap_sysctl_node),
1563 	    OID_AUTO, "fpga", CTLFLAG_RD | CTLFLAG_MPSAFE,
1564 	    &dev->caps.fpga, sizeof(dev->caps.fpga), "IU", "");
1565 
1566 	INIT_LIST_HEAD(&priv->ctx_list);
1567 	spin_lock_init(&priv->ctx_lock);
1568 	mutex_init(&dev->pci_status_mutex);
1569 	mutex_init(&dev->intf_state_mutex);
1570 	mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
1571 	err = mlx5_pci_init(dev, priv);
1572 	if (err) {
1573 		mlx5_core_err(dev, "mlx5_pci_init failed %d\n", err);
1574 		goto clean_dev;
1575 	}
1576 
1577 	err = mlx5_health_init(dev);
1578 	if (err) {
1579 		mlx5_core_err(dev, "mlx5_health_init failed %d\n", err);
1580 		goto close_pci;
1581 	}
1582 
1583 	mlx5_pagealloc_init(dev);
1584 
1585 	err = mlx5_load_one(dev, priv, true);
1586 	if (err) {
1587 		mlx5_core_err(dev, "mlx5_load_one failed %d\n", err);
1588 		goto clean_health;
1589 	}
1590 
1591 	mlx5_fwdump_prep(dev);
1592 
1593 	mlx5_firmware_update(dev);
1594 
1595 #ifdef PCI_IOV
1596 	if (MLX5_CAP_GEN(dev, vport_group_manager)) {
1597 		if (pci_find_extcap(bsddev, PCIZ_SRIOV, &sriov_pos) == 0) {
1598 			num_vfs = pci_read_config(bsddev, sriov_pos +
1599 			    PCIR_SRIOV_TOTAL_VFS, 2);
1600 		} else {
1601 			mlx5_core_info(dev, "cannot find SR-IOV PCIe cap\n");
1602 			num_vfs = 0;
1603 		}
1604 		err = mlx5_eswitch_init(dev, 1 + num_vfs);
1605 		if (err == 0) {
1606 			pf_schema = pci_iov_schema_alloc_node();
1607 			vf_schema = pci_iov_schema_alloc_node();
1608 			pci_iov_schema_add_unicast_mac(vf_schema,
1609 			    iov_mac_addr_name, 0, NULL);
1610 			pci_iov_schema_add_uint64(vf_schema, iov_node_guid_name,
1611 			    0, 0);
1612 			pci_iov_schema_add_uint64(vf_schema, iov_port_guid_name,
1613 			    0, 0);
1614 			err = pci_iov_attach(bsddev, pf_schema, vf_schema);
1615 			if (err != 0) {
1616 				device_printf(bsddev,
1617 			    "Failed to initialize SR-IOV support, error %d\n",
1618 				    err);
1619 			}
1620 		} else {
1621 			mlx5_core_err(dev, "eswitch init failed, error %d\n",
1622 			    err);
1623 		}
1624 	}
1625 #endif
1626 
1627 	pci_save_state(bsddev);
1628 	return 0;
1629 
1630 clean_health:
1631 	mlx5_pagealloc_cleanup(dev);
1632 	mlx5_health_cleanup(dev);
1633 close_pci:
1634 	mlx5_pci_close(dev, priv);
1635 clean_dev:
1636 	mtx_destroy(&dev->dump_lock);
1637 clean_sysctl_ctx:
1638 	sysctl_ctx_free(&dev->sysctl_ctx);
1639 	kfree(dev);
1640 	return err;
1641 }
1642 
remove_one(struct pci_dev * pdev)1643 static void remove_one(struct pci_dev *pdev)
1644 {
1645 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1646 	struct mlx5_priv *priv = &dev->priv;
1647 
1648 #ifdef PCI_IOV
1649 	pci_iov_detach(pdev->dev.bsddev);
1650 	mlx5_eswitch_disable_sriov(priv->eswitch);
1651 #endif
1652 
1653 	if (mlx5_unload_one(dev, priv, true)) {
1654 		mlx5_core_err(dev, "mlx5_unload_one() failed, leaked %lld bytes\n",
1655 		    (long long)(dev->priv.fw_pages * MLX5_ADAPTER_PAGE_SIZE));
1656 	}
1657 
1658 	mlx5_pagealloc_cleanup(dev);
1659 	mlx5_health_cleanup(dev);
1660 	mlx5_fwdump_clean(dev);
1661 	mlx5_pci_close(dev, priv);
1662 	mtx_destroy(&dev->dump_lock);
1663 	pci_set_drvdata(pdev, NULL);
1664 	sysctl_ctx_free(&dev->sysctl_ctx);
1665 	kfree(dev);
1666 }
1667 
mlx5_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)1668 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1669 					      pci_channel_state_t state)
1670 {
1671 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1672 	struct mlx5_priv *priv = &dev->priv;
1673 
1674 	mlx5_core_info(dev, "%s was called\n", __func__);
1675 	mlx5_enter_error_state(dev, false);
1676 	mlx5_unload_one(dev, priv, false);
1677 
1678 	if (state) {
1679 		mlx5_drain_health_wq(dev);
1680 		mlx5_pci_disable_device(dev);
1681 	}
1682 
1683 	return state == pci_channel_io_perm_failure ?
1684 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1685 }
1686 
mlx5_pci_slot_reset(struct pci_dev * pdev)1687 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1688 {
1689 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1690 	int err = 0;
1691 
1692 	mlx5_core_info(dev,"%s was called\n", __func__);
1693 
1694 	err = mlx5_pci_enable_device(dev);
1695 	if (err) {
1696 		mlx5_core_err(dev, "mlx5_pci_enable_device failed with error code: %d\n"
1697 			,err);
1698 		return PCI_ERS_RESULT_DISCONNECT;
1699 	}
1700 	pci_set_master(pdev);
1701 	pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1702 	pci_restore_state(pdev->dev.bsddev);
1703 	pci_save_state(pdev->dev.bsddev);
1704 
1705 	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1706 }
1707 
1708 /* wait for the device to show vital signs. For now we check
1709  * that we can read the device ID and that the health buffer
1710  * shows a non zero value which is different than 0xffffffff
1711  */
wait_vital(struct pci_dev * pdev)1712 static void wait_vital(struct pci_dev *pdev)
1713 {
1714 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1715 	struct mlx5_core_health *health = &dev->priv.health;
1716 	const int niter = 100;
1717 	u32 count;
1718 	u16 did;
1719 	int i;
1720 
1721 	/* Wait for firmware to be ready after reset */
1722 	msleep(1000);
1723 	for (i = 0; i < niter; i++) {
1724 		if (pci_read_config_word(pdev, 2, &did)) {
1725 			mlx5_core_warn(dev, "failed reading config word\n");
1726 			break;
1727 		}
1728 		if (did == pdev->device) {
1729 			mlx5_core_info(dev,
1730 			    "device ID correctly read after %d iterations\n", i);
1731 			break;
1732 		}
1733 		msleep(50);
1734 	}
1735 	if (i == niter)
1736 		mlx5_core_warn(dev, "could not read device ID\n");
1737 
1738 	for (i = 0; i < niter; i++) {
1739 		count = ioread32be(health->health_counter);
1740 		if (count && count != 0xffffffff) {
1741 			mlx5_core_info(dev,
1742 			"Counter value 0x%x after %d iterations\n", count, i);
1743 			break;
1744 		}
1745 		msleep(50);
1746 	}
1747 
1748 	if (i == niter)
1749 		mlx5_core_warn(dev, "could not read device ID\n");
1750 }
1751 
mlx5_pci_resume(struct pci_dev * pdev)1752 static void mlx5_pci_resume(struct pci_dev *pdev)
1753 {
1754 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1755 	struct mlx5_priv *priv = &dev->priv;
1756 	int err;
1757 
1758 	mlx5_core_info(dev,"%s was called\n", __func__);
1759 
1760 	wait_vital(pdev);
1761 
1762 	err = mlx5_load_one(dev, priv, false);
1763 	if (err)
1764 		mlx5_core_err(dev,
1765 		    "mlx5_load_one failed with error code: %d\n" ,err);
1766 	else
1767 		mlx5_core_info(dev,"device recovered\n");
1768 }
1769 
1770 static const struct pci_error_handlers mlx5_err_handler = {
1771 	.error_detected = mlx5_pci_err_detected,
1772 	.slot_reset	= mlx5_pci_slot_reset,
1773 	.resume		= mlx5_pci_resume
1774 };
1775 
1776 #ifdef PCI_IOV
1777 static int
mlx5_iov_init(device_t dev,uint16_t num_vfs,const nvlist_t * pf_config)1778 mlx5_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *pf_config)
1779 {
1780 	struct pci_dev *pdev;
1781 	struct mlx5_core_dev *core_dev;
1782 	struct mlx5_priv *priv;
1783 	int err;
1784 
1785 	pdev = device_get_softc(dev);
1786 	core_dev = pci_get_drvdata(pdev);
1787 	priv = &core_dev->priv;
1788 
1789 	if (priv->eswitch == NULL)
1790 		return (ENXIO);
1791 	if (priv->eswitch->total_vports < num_vfs + 1)
1792 		num_vfs = priv->eswitch->total_vports - 1;
1793 	err = mlx5_eswitch_enable_sriov(priv->eswitch, num_vfs);
1794 	return (-err);
1795 }
1796 
1797 static void
mlx5_iov_uninit(device_t dev)1798 mlx5_iov_uninit(device_t dev)
1799 {
1800 	struct pci_dev *pdev;
1801 	struct mlx5_core_dev *core_dev;
1802 	struct mlx5_priv *priv;
1803 
1804 	pdev = device_get_softc(dev);
1805 	core_dev = pci_get_drvdata(pdev);
1806 	priv = &core_dev->priv;
1807 
1808 	mlx5_eswitch_disable_sriov(priv->eswitch);
1809 }
1810 
1811 static int
mlx5_iov_add_vf(device_t dev,uint16_t vfnum,const nvlist_t * vf_config)1812 mlx5_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *vf_config)
1813 {
1814 	struct pci_dev *pdev;
1815 	struct mlx5_core_dev *core_dev;
1816 	struct mlx5_priv *priv;
1817 	const void *mac;
1818 	size_t mac_size;
1819 	uint64_t node_guid, port_guid;
1820 	int error;
1821 
1822 	pdev = device_get_softc(dev);
1823 	core_dev = pci_get_drvdata(pdev);
1824 	priv = &core_dev->priv;
1825 
1826 	if (vfnum + 1 >= priv->eswitch->total_vports)
1827 		return (ENXIO);
1828 
1829 	if (nvlist_exists_binary(vf_config, iov_mac_addr_name)) {
1830 		mac = nvlist_get_binary(vf_config, iov_mac_addr_name,
1831 		    &mac_size);
1832 		error = -mlx5_eswitch_set_vport_mac(priv->eswitch,
1833 		    vfnum + 1, __DECONST(u8 *, mac));
1834 		if (error != 0) {
1835 			mlx5_core_err(core_dev,
1836 			    "setting MAC for VF %d failed, error %d\n",
1837 			    vfnum + 1, error);
1838 		}
1839 	}
1840 
1841 	if (nvlist_exists_number(vf_config, iov_node_guid_name)) {
1842 		node_guid = nvlist_get_number(vf_config, iov_node_guid_name);
1843 		error = -mlx5_modify_nic_vport_node_guid(core_dev, vfnum + 1,
1844 		    node_guid);
1845 		if (error != 0) {
1846 			mlx5_core_err(core_dev,
1847 		    "modifying node GUID for VF %d failed, error %d\n",
1848 			    vfnum + 1, error);
1849 		}
1850 	}
1851 
1852 	if (nvlist_exists_number(vf_config, iov_port_guid_name)) {
1853 		port_guid = nvlist_get_number(vf_config, iov_port_guid_name);
1854 		error = -mlx5_modify_nic_vport_port_guid(core_dev, vfnum + 1,
1855 		    port_guid);
1856 		if (error != 0) {
1857 			mlx5_core_err(core_dev,
1858 		    "modifying port GUID for VF %d failed, error %d\n",
1859 			    vfnum + 1, error);
1860 		}
1861 	}
1862 
1863 	error = -mlx5_eswitch_set_vport_state(priv->eswitch, vfnum + 1,
1864 	    VPORT_STATE_FOLLOW);
1865 	if (error != 0) {
1866 		mlx5_core_err(core_dev,
1867 		    "upping vport for VF %d failed, error %d\n",
1868 		    vfnum + 1, error);
1869 	}
1870 	error = -mlx5_core_enable_hca(core_dev, vfnum + 1);
1871 	if (error != 0) {
1872 		mlx5_core_err(core_dev, "enabling VF %d failed, error %d\n",
1873 		    vfnum + 1, error);
1874 	}
1875 	return (error);
1876 }
1877 #endif
1878 
mlx5_try_fast_unload(struct mlx5_core_dev * dev)1879 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1880 {
1881 	bool fast_teardown, force_teardown;
1882 	int err;
1883 
1884 	if (!mlx5_fast_unload_enabled) {
1885 		mlx5_core_dbg(dev, "fast unload is disabled by user\n");
1886 		return -EOPNOTSUPP;
1887 	}
1888 
1889 	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
1890 	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
1891 
1892 	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
1893 	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
1894 
1895 	if (!fast_teardown && !force_teardown)
1896 		return -EOPNOTSUPP;
1897 
1898 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1899 		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1900 		return -EAGAIN;
1901 	}
1902 
1903 	/* Panic tear down fw command will stop the PCI bus communication
1904 	 * with the HCA, so the health polll is no longer needed.
1905 	 */
1906 	mlx5_drain_health_wq(dev);
1907 	mlx5_stop_health_poll(dev, false);
1908 
1909 	err = mlx5_cmd_fast_teardown_hca(dev);
1910 	if (!err)
1911 		goto done;
1912 
1913 	err = mlx5_cmd_force_teardown_hca(dev);
1914 	if (!err)
1915 		goto done;
1916 
1917 	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
1918 	mlx5_start_health_poll(dev);
1919 	return err;
1920 done:
1921 	mlx5_enter_error_state(dev, true);
1922 	return 0;
1923 }
1924 
mlx5_shutdown_disable_interrupts(struct mlx5_core_dev * mdev)1925 static void mlx5_shutdown_disable_interrupts(struct mlx5_core_dev *mdev)
1926 {
1927 	int nvec = mdev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
1928 	int x;
1929 
1930 	mdev->priv.disable_irqs = 1;
1931 
1932 	/* wait for all IRQ handlers to finish processing */
1933 	for (x = 0; x != nvec; x++)
1934 		synchronize_irq(mdev->priv.msix_arr[x].vector);
1935 }
1936 
shutdown_one(struct pci_dev * pdev)1937 static void shutdown_one(struct pci_dev *pdev)
1938 {
1939 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1940 	struct mlx5_priv *priv = &dev->priv;
1941 	int err;
1942 
1943 	/* enter polling mode */
1944 	mlx5_cmd_use_polling(dev);
1945 
1946 	set_bit(MLX5_INTERFACE_STATE_TEARDOWN, &dev->intf_state);
1947 
1948 	/* disable all interrupts */
1949 	mlx5_shutdown_disable_interrupts(dev);
1950 
1951 	err = mlx5_try_fast_unload(dev);
1952 	if (err)
1953 	        mlx5_unload_one(dev, priv, false);
1954 	mlx5_pci_disable_device(dev);
1955 }
1956 
1957 static const struct pci_device_id mlx5_core_pci_table[] = {
1958 	{ PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
1959 	{ PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
1960 	{ PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
1961 	{ PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
1962 	{ PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
1963 	{ PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
1964 	{ PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5 */
1965 	{ PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
1966 	{ PCI_VDEVICE(MELLANOX, 4121) },
1967 	{ PCI_VDEVICE(MELLANOX, 4122) },
1968 	{ PCI_VDEVICE(MELLANOX, 4123) },
1969 	{ PCI_VDEVICE(MELLANOX, 4124) },
1970 	{ PCI_VDEVICE(MELLANOX, 4125) },
1971 	{ PCI_VDEVICE(MELLANOX, 4126) },
1972 	{ PCI_VDEVICE(MELLANOX, 4127) },
1973 	{ PCI_VDEVICE(MELLANOX, 4128) },
1974 	{ PCI_VDEVICE(MELLANOX, 4129) },
1975 	{ PCI_VDEVICE(MELLANOX, 4130) },
1976 	{ PCI_VDEVICE(MELLANOX, 4131) },
1977 	{ PCI_VDEVICE(MELLANOX, 4132) },
1978 	{ PCI_VDEVICE(MELLANOX, 4133) },
1979 	{ PCI_VDEVICE(MELLANOX, 4134) },
1980 	{ PCI_VDEVICE(MELLANOX, 4135) },
1981 	{ PCI_VDEVICE(MELLANOX, 4136) },
1982 	{ PCI_VDEVICE(MELLANOX, 4137) },
1983 	{ PCI_VDEVICE(MELLANOX, 4138) },
1984 	{ PCI_VDEVICE(MELLANOX, 4139) },
1985 	{ PCI_VDEVICE(MELLANOX, 4140) },
1986 	{ PCI_VDEVICE(MELLANOX, 4141) },
1987 	{ PCI_VDEVICE(MELLANOX, 4142) },
1988 	{ PCI_VDEVICE(MELLANOX, 4143) },
1989 	{ PCI_VDEVICE(MELLANOX, 4144) },
1990 	{ 0, }
1991 };
1992 
1993 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1994 
mlx5_disable_device(struct mlx5_core_dev * dev)1995 void mlx5_disable_device(struct mlx5_core_dev *dev)
1996 {
1997 	mlx5_pci_err_detected(dev->pdev, 0);
1998 }
1999 
mlx5_recover_device(struct mlx5_core_dev * dev)2000 void mlx5_recover_device(struct mlx5_core_dev *dev)
2001 {
2002 	mlx5_pci_disable_device(dev);
2003 	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
2004 		mlx5_pci_resume(dev->pdev);
2005 }
2006 
2007 struct pci_driver mlx5_core_driver = {
2008 	.name           = DRIVER_NAME,
2009 	.id_table       = mlx5_core_pci_table,
2010 	.shutdown	= shutdown_one,
2011 	.probe          = init_one,
2012 	.remove         = remove_one,
2013 	.err_handler	= &mlx5_err_handler,
2014 #ifdef PCI_IOV
2015 	.bsd_iov_init	= mlx5_iov_init,
2016 	.bsd_iov_uninit	= mlx5_iov_uninit,
2017 	.bsd_iov_add_vf	= mlx5_iov_add_vf,
2018 #endif
2019 };
2020 
init(void)2021 static int __init init(void)
2022 {
2023 	int err;
2024 
2025 	err = pci_register_driver(&mlx5_core_driver);
2026 	if (err)
2027 		goto err_debug;
2028 
2029 	err = mlx5_ctl_init();
2030 	if (err)
2031 		goto err_ctl;
2032 
2033  	return 0;
2034 
2035 err_ctl:
2036 	pci_unregister_driver(&mlx5_core_driver);
2037 
2038 err_debug:
2039 	return err;
2040 }
2041 
cleanup(void)2042 static void __exit cleanup(void)
2043 {
2044 	mlx5_ctl_fini();
2045 	pci_unregister_driver(&mlx5_core_driver);
2046 }
2047 
2048 module_init_order(init, SI_ORDER_FIRST);
2049 module_exit_order(cleanup, SI_ORDER_FIRST);
2050