1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/pci-p2pdma.h>
36 #include <linux/apple-gmux.h>
37 
38 #include <drm/drm_aperture.h>
39 #include <drm/drm_atomic_helper.h>
40 #include <drm/drm_crtc_helper.h>
41 #include <drm/drm_fb_helper.h>
42 #include <drm/drm_probe_helper.h>
43 #include <drm/amdgpu_drm.h>
44 #include <linux/device.h>
45 #include <linux/vgaarb.h>
46 #include <linux/vga_switcheroo.h>
47 #include <linux/efi.h>
48 #include "amdgpu.h"
49 #include "amdgpu_trace.h"
50 #include "amdgpu_i2c.h"
51 #include "atom.h"
52 #include "amdgpu_atombios.h"
53 #include "amdgpu_atomfirmware.h"
54 #include "amd_pcie.h"
55 #ifdef CONFIG_DRM_AMDGPU_SI
56 #include "si.h"
57 #endif
58 #ifdef CONFIG_DRM_AMDGPU_CIK
59 #include "cik.h"
60 #endif
61 #include "vi.h"
62 #include "soc15.h"
63 #include "nv.h"
64 #include "bif/bif_4_1_d.h"
65 #include <linux/firmware.h>
66 #include "amdgpu_vf_error.h"
67 
68 #include "amdgpu_amdkfd.h"
69 #include "amdgpu_pm.h"
70 
71 #include "amdgpu_xgmi.h"
72 #include "amdgpu_ras.h"
73 #include "amdgpu_pmu.h"
74 #include "amdgpu_fru_eeprom.h"
75 #include "amdgpu_reset.h"
76 #include "amdgpu_virt.h"
77 #include "amdgpu_dev_coredump.h"
78 
79 #include <linux/suspend.h>
80 #include <drm/task_barrier.h>
81 #include <linux/pm_runtime.h>
82 
83 #include <drm/drm_drv.h>
84 
85 #if IS_ENABLED(CONFIG_X86) && defined(__linux__)
86 #include <asm/intel-family.h>
87 #endif
88 
89 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
95 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
96 
97 #define AMDGPU_RESUME_MS		2000
98 #define AMDGPU_MAX_RETRY_LIMIT		2
99 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
100 #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
101 #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
102 #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
103 
104 static const struct drm_driver amdgpu_kms_driver;
105 
106 const char *amdgpu_asic_name[] = {
107 	"TAHITI",
108 	"PITCAIRN",
109 	"VERDE",
110 	"OLAND",
111 	"HAINAN",
112 	"BONAIRE",
113 	"KAVERI",
114 	"KABINI",
115 	"HAWAII",
116 	"MULLINS",
117 	"TOPAZ",
118 	"TONGA",
119 	"FIJI",
120 	"CARRIZO",
121 	"STONEY",
122 	"POLARIS10",
123 	"POLARIS11",
124 	"POLARIS12",
125 	"VEGAM",
126 	"VEGA10",
127 	"VEGA12",
128 	"VEGA20",
129 	"RAVEN",
130 	"ARCTURUS",
131 	"RENOIR",
132 	"ALDEBARAN",
133 	"NAVI10",
134 	"CYAN_SKILLFISH",
135 	"NAVI14",
136 	"NAVI12",
137 	"SIENNA_CICHLID",
138 	"NAVY_FLOUNDER",
139 	"VANGOGH",
140 	"DIMGREY_CAVEFISH",
141 	"BEIGE_GOBY",
142 	"YELLOW_CARP",
143 	"IP DISCOVERY",
144 	"LAST",
145 };
146 
147 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
148 
149 /**
150  * DOC: pcie_replay_count
151  *
152  * The amdgpu driver provides a sysfs API for reporting the total number
153  * of PCIe replays (NAKs)
154  * The file pcie_replay_count is used for this and returns the total
155  * number of replays as a sum of the NAKs generated and NAKs received
156  */
157 
amdgpu_device_get_pcie_replay_count(struct device * dev,struct device_attribute * attr,char * buf)158 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
159 		struct device_attribute *attr, char *buf)
160 {
161 	struct drm_device *ddev = dev_get_drvdata(dev);
162 	struct amdgpu_device *adev = drm_to_adev(ddev);
163 	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
164 
165 	return sysfs_emit(buf, "%llu\n", cnt);
166 }
167 
168 static DEVICE_ATTR(pcie_replay_count, 0444,
169 		amdgpu_device_get_pcie_replay_count, NULL);
170 
171 #ifdef __linux__
172 
amdgpu_sysfs_reg_state_get(struct file * f,struct kobject * kobj,struct bin_attribute * attr,char * buf,loff_t ppos,size_t count)173 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
174 					  struct bin_attribute *attr, char *buf,
175 					  loff_t ppos, size_t count)
176 {
177 	struct device *dev = kobj_to_dev(kobj);
178 	struct drm_device *ddev = dev_get_drvdata(dev);
179 	struct amdgpu_device *adev = drm_to_adev(ddev);
180 	ssize_t bytes_read;
181 
182 	switch (ppos) {
183 	case AMDGPU_SYS_REG_STATE_XGMI:
184 		bytes_read = amdgpu_asic_get_reg_state(
185 			adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
186 		break;
187 	case AMDGPU_SYS_REG_STATE_WAFL:
188 		bytes_read = amdgpu_asic_get_reg_state(
189 			adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
190 		break;
191 	case AMDGPU_SYS_REG_STATE_PCIE:
192 		bytes_read = amdgpu_asic_get_reg_state(
193 			adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
194 		break;
195 	case AMDGPU_SYS_REG_STATE_USR:
196 		bytes_read = amdgpu_asic_get_reg_state(
197 			adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
198 		break;
199 	case AMDGPU_SYS_REG_STATE_USR_1:
200 		bytes_read = amdgpu_asic_get_reg_state(
201 			adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
202 		break;
203 	default:
204 		return -EINVAL;
205 	}
206 
207 	return bytes_read;
208 }
209 
210 BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL,
211 	 AMDGPU_SYS_REG_STATE_END);
212 
213 #endif /* __linux__ */
214 
amdgpu_reg_state_sysfs_init(struct amdgpu_device * adev)215 int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
216 {
217 	int ret;
218 
219 	if (!amdgpu_asic_get_reg_state_supported(adev))
220 		return 0;
221 
222 	ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
223 
224 	return ret;
225 }
226 
amdgpu_reg_state_sysfs_fini(struct amdgpu_device * adev)227 void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
228 {
229 	if (!amdgpu_asic_get_reg_state_supported(adev))
230 		return;
231 	sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state);
232 }
233 
234 /**
235  * DOC: board_info
236  *
237  * The amdgpu driver provides a sysfs API for giving board related information.
238  * It provides the form factor information in the format
239  *
240  *   type : form factor
241  *
242  * Possible form factor values
243  *
244  * - "cem"		- PCIE CEM card
245  * - "oam"		- Open Compute Accelerator Module
246  * - "unknown"	- Not known
247  *
248  */
249 
amdgpu_device_get_board_info(struct device * dev,struct device_attribute * attr,char * buf)250 static ssize_t amdgpu_device_get_board_info(struct device *dev,
251 					    struct device_attribute *attr,
252 					    char *buf)
253 {
254 	struct drm_device *ddev = dev_get_drvdata(dev);
255 	struct amdgpu_device *adev = drm_to_adev(ddev);
256 	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
257 	const char *pkg;
258 
259 	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
260 		pkg_type = adev->smuio.funcs->get_pkg_type(adev);
261 
262 	switch (pkg_type) {
263 	case AMDGPU_PKG_TYPE_CEM:
264 		pkg = "cem";
265 		break;
266 	case AMDGPU_PKG_TYPE_OAM:
267 		pkg = "oam";
268 		break;
269 	default:
270 		pkg = "unknown";
271 		break;
272 	}
273 
274 	return sysfs_emit(buf, "%s : %s\n", "type", pkg);
275 }
276 
277 static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL);
278 
279 static struct attribute *amdgpu_board_attrs[] = {
280 	&dev_attr_board_info.attr,
281 	NULL,
282 };
283 
284 #ifdef notyet
amdgpu_board_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int n)285 static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
286 					     struct attribute *attr, int n)
287 {
288 	struct device *dev = kobj_to_dev(kobj);
289 	struct drm_device *ddev = dev_get_drvdata(dev);
290 	struct amdgpu_device *adev = drm_to_adev(ddev);
291 
292 	if (adev->flags & AMD_IS_APU)
293 		return 0;
294 
295 	return attr->mode;
296 }
297 #endif
298 
299 static const struct attribute_group amdgpu_board_attrs_group = {
300 	.attrs = amdgpu_board_attrs,
301 #ifdef notyet
302 	.is_visible = amdgpu_board_attrs_is_visible
303 #endif
304 };
305 
306 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
307 
308 
309 /**
310  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
311  *
312  * @dev: drm_device pointer
313  *
314  * Returns true if the device is a dGPU with ATPX power control,
315  * otherwise return false.
316  */
amdgpu_device_supports_px(struct drm_device * dev)317 bool amdgpu_device_supports_px(struct drm_device *dev)
318 {
319 	struct amdgpu_device *adev = drm_to_adev(dev);
320 
321 	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
322 		return true;
323 	return false;
324 }
325 
326 /**
327  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
328  *
329  * @dev: drm_device pointer
330  *
331  * Returns true if the device is a dGPU with ACPI power control,
332  * otherwise return false.
333  */
amdgpu_device_supports_boco(struct drm_device * dev)334 bool amdgpu_device_supports_boco(struct drm_device *dev)
335 {
336 	struct amdgpu_device *adev = drm_to_adev(dev);
337 
338 	if (adev->has_pr3 ||
339 	    ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
340 		return true;
341 	return false;
342 }
343 
344 /**
345  * amdgpu_device_supports_baco - Does the device support BACO
346  *
347  * @dev: drm_device pointer
348  *
349  * Return:
350  * 1 if the device supporte BACO;
351  * 3 if the device support MACO (only works if BACO is supported)
352  * otherwise return 0.
353  */
amdgpu_device_supports_baco(struct drm_device * dev)354 int amdgpu_device_supports_baco(struct drm_device *dev)
355 {
356 	struct amdgpu_device *adev = drm_to_adev(dev);
357 
358 	return amdgpu_asic_supports_baco(adev);
359 }
360 
amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device * adev)361 void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
362 {
363 	struct drm_device *dev;
364 	int bamaco_support;
365 
366 	dev = adev_to_drm(adev);
367 
368 	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
369 	bamaco_support = amdgpu_device_supports_baco(dev);
370 
371 	switch (amdgpu_runtime_pm) {
372 	case 2:
373 		if (bamaco_support & MACO_SUPPORT) {
374 			adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
375 			dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
376 		} else if (bamaco_support == BACO_SUPPORT) {
377 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
378 			dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
379 		}
380 		break;
381 	case 1:
382 		if (bamaco_support & BACO_SUPPORT) {
383 			adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
384 			dev_info(adev->dev, "Forcing BACO for runtime pm\n");
385 		}
386 		break;
387 	case -1:
388 	case -2:
389 		if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
390 			adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
391 			dev_info(adev->dev, "Using ATPX for runtime pm\n");
392 		} else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
393 			adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
394 			dev_info(adev->dev, "Using BOCO for runtime pm\n");
395 		} else {
396 			if (!bamaco_support)
397 				goto no_runtime_pm;
398 
399 			switch (adev->asic_type) {
400 			case CHIP_VEGA20:
401 			case CHIP_ARCTURUS:
402 				/* BACO are not supported on vega20 and arctrus */
403 				break;
404 			case CHIP_VEGA10:
405 				/* enable BACO as runpm mode if noretry=0 */
406 				if (!adev->gmc.noretry)
407 					adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
408 				break;
409 			default:
410 				/* enable BACO as runpm mode on CI+ */
411 				adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
412 				break;
413 			}
414 
415 			if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
416 				if (bamaco_support & MACO_SUPPORT) {
417 					adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
418 					dev_info(adev->dev, "Using BAMACO for runtime pm\n");
419 				} else {
420 					dev_info(adev->dev, "Using BACO for runtime pm\n");
421 				}
422 			}
423 		}
424 		break;
425 	case 0:
426 		dev_info(adev->dev, "runtime pm is manually disabled\n");
427 		break;
428 	default:
429 		break;
430 	}
431 
432 no_runtime_pm:
433 	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
434 		dev_info(adev->dev, "Runtime PM not available\n");
435 }
436 /**
437  * amdgpu_device_supports_smart_shift - Is the device dGPU with
438  * smart shift support
439  *
440  * @dev: drm_device pointer
441  *
442  * Returns true if the device is a dGPU with Smart Shift support,
443  * otherwise returns false.
444  */
amdgpu_device_supports_smart_shift(struct drm_device * dev)445 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
446 {
447 	return (amdgpu_device_supports_boco(dev) &&
448 		amdgpu_acpi_is_power_shift_control_supported());
449 }
450 
451 /*
452  * VRAM access helper functions
453  */
454 
455 /**
456  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
457  *
458  * @adev: amdgpu_device pointer
459  * @pos: offset of the buffer in vram
460  * @buf: virtual address of the buffer in system memory
461  * @size: read/write size, sizeof(@buf) must > @size
462  * @write: true - write to vram, otherwise - read from vram
463  */
amdgpu_device_mm_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)464 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
465 			     void *buf, size_t size, bool write)
466 {
467 	unsigned long flags;
468 	uint32_t hi = ~0, tmp = 0;
469 	uint32_t *data = buf;
470 	uint64_t last;
471 	int idx;
472 
473 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
474 		return;
475 
476 	BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
477 
478 	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
479 	for (last = pos + size; pos < last; pos += 4) {
480 		tmp = pos >> 31;
481 
482 		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
483 		if (tmp != hi) {
484 			WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
485 			hi = tmp;
486 		}
487 		if (write)
488 			WREG32_NO_KIQ(mmMM_DATA, *data++);
489 		else
490 			*data++ = RREG32_NO_KIQ(mmMM_DATA);
491 	}
492 
493 	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
494 	drm_dev_exit(idx);
495 }
496 
497 /**
498  * amdgpu_device_aper_access - access vram by vram aperature
499  *
500  * @adev: amdgpu_device pointer
501  * @pos: offset of the buffer in vram
502  * @buf: virtual address of the buffer in system memory
503  * @size: read/write size, sizeof(@buf) must > @size
504  * @write: true - write to vram, otherwise - read from vram
505  *
506  * The return value means how many bytes have been transferred.
507  */
amdgpu_device_aper_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)508 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
509 				 void *buf, size_t size, bool write)
510 {
511 #ifdef CONFIG_64BIT
512 	void __iomem *addr;
513 	size_t count = 0;
514 	uint64_t last;
515 
516 	if (!adev->mman.aper_base_kaddr)
517 		return 0;
518 
519 	last = min(pos + size, adev->gmc.visible_vram_size);
520 	if (last > pos) {
521 		addr = adev->mman.aper_base_kaddr + pos;
522 		count = last - pos;
523 
524 		if (write) {
525 			memcpy_toio(addr, buf, count);
526 			/* Make sure HDP write cache flush happens without any reordering
527 			 * after the system memory contents are sent over PCIe device
528 			 */
529 			mb();
530 			amdgpu_device_flush_hdp(adev, NULL);
531 		} else {
532 			amdgpu_device_invalidate_hdp(adev, NULL);
533 			/* Make sure HDP read cache is invalidated before issuing a read
534 			 * to the PCIe device
535 			 */
536 			mb();
537 			memcpy_fromio(buf, addr, count);
538 		}
539 
540 	}
541 
542 	return count;
543 #else
544 	return 0;
545 #endif
546 }
547 
548 /**
549  * amdgpu_device_vram_access - read/write a buffer in vram
550  *
551  * @adev: amdgpu_device pointer
552  * @pos: offset of the buffer in vram
553  * @buf: virtual address of the buffer in system memory
554  * @size: read/write size, sizeof(@buf) must > @size
555  * @write: true - write to vram, otherwise - read from vram
556  */
amdgpu_device_vram_access(struct amdgpu_device * adev,loff_t pos,void * buf,size_t size,bool write)557 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
558 			       void *buf, size_t size, bool write)
559 {
560 	size_t count;
561 
562 	/* try to using vram apreature to access vram first */
563 	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
564 	size -= count;
565 	if (size) {
566 		/* using MM to access rest vram */
567 		pos += count;
568 		buf += count;
569 		amdgpu_device_mm_access(adev, pos, buf, size, write);
570 	}
571 }
572 
573 /*
574  * register access helper functions.
575  */
576 
577 /* Check if hw access should be skipped because of hotplug or device error */
amdgpu_device_skip_hw_access(struct amdgpu_device * adev)578 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
579 {
580 	if (adev->no_hw_access)
581 		return true;
582 
583 #ifdef CONFIG_LOCKDEP
584 	/*
585 	 * This is a bit complicated to understand, so worth a comment. What we assert
586 	 * here is that the GPU reset is not running on another thread in parallel.
587 	 *
588 	 * For this we trylock the read side of the reset semaphore, if that succeeds
589 	 * we know that the reset is not running in paralell.
590 	 *
591 	 * If the trylock fails we assert that we are either already holding the read
592 	 * side of the lock or are the reset thread itself and hold the write side of
593 	 * the lock.
594 	 */
595 	if (in_task()) {
596 		if (down_read_trylock(&adev->reset_domain->sem))
597 			up_read(&adev->reset_domain->sem);
598 		else
599 			lockdep_assert_held(&adev->reset_domain->sem);
600 	}
601 #endif
602 	return false;
603 }
604 
605 /**
606  * amdgpu_device_rreg - read a memory mapped IO or indirect register
607  *
608  * @adev: amdgpu_device pointer
609  * @reg: dword aligned register offset
610  * @acc_flags: access flags which require special behavior
611  *
612  * Returns the 32 bit value from the offset specified.
613  */
amdgpu_device_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags)614 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
615 			    uint32_t reg, uint32_t acc_flags)
616 {
617 	uint32_t ret;
618 
619 	if (amdgpu_device_skip_hw_access(adev))
620 		return 0;
621 
622 	if ((reg * 4) < adev->rmmio_size) {
623 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
624 		    amdgpu_sriov_runtime(adev) &&
625 		    down_read_trylock(&adev->reset_domain->sem)) {
626 			ret = amdgpu_kiq_rreg(adev, reg, 0);
627 			up_read(&adev->reset_domain->sem);
628 		} else {
629 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
630 		}
631 	} else {
632 		ret = adev->pcie_rreg(adev, reg * 4);
633 	}
634 
635 	trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
636 
637 	return ret;
638 }
639 
640 /*
641  * MMIO register read with bytes helper functions
642  * @offset:bytes offset from MMIO start
643  */
644 
645 /**
646  * amdgpu_mm_rreg8 - read a memory mapped IO register
647  *
648  * @adev: amdgpu_device pointer
649  * @offset: byte aligned register offset
650  *
651  * Returns the 8 bit value from the offset specified.
652  */
amdgpu_mm_rreg8(struct amdgpu_device * adev,uint32_t offset)653 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
654 {
655 	if (amdgpu_device_skip_hw_access(adev))
656 		return 0;
657 
658 	if (offset < adev->rmmio_size)
659 		return (readb(adev->rmmio + offset));
660 	BUG();
661 }
662 
663 
664 /**
665  * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
666  *
667  * @adev: amdgpu_device pointer
668  * @reg: dword aligned register offset
669  * @acc_flags: access flags which require special behavior
670  * @xcc_id: xcc accelerated compute core id
671  *
672  * Returns the 32 bit value from the offset specified.
673  */
amdgpu_device_xcc_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t acc_flags,uint32_t xcc_id)674 uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
675 				uint32_t reg, uint32_t acc_flags,
676 				uint32_t xcc_id)
677 {
678 	uint32_t ret, rlcg_flag;
679 
680 	if (amdgpu_device_skip_hw_access(adev))
681 		return 0;
682 
683 	if ((reg * 4) < adev->rmmio_size) {
684 		if (amdgpu_sriov_vf(adev) &&
685 		    !amdgpu_sriov_runtime(adev) &&
686 		    adev->gfx.rlc.rlcg_reg_access_supported &&
687 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
688 							 GC_HWIP, false,
689 							 &rlcg_flag)) {
690 			ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
691 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
692 		    amdgpu_sriov_runtime(adev) &&
693 		    down_read_trylock(&adev->reset_domain->sem)) {
694 			ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
695 			up_read(&adev->reset_domain->sem);
696 		} else {
697 			ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
698 		}
699 	} else {
700 		ret = adev->pcie_rreg(adev, reg * 4);
701 	}
702 
703 	return ret;
704 }
705 
706 /*
707  * MMIO register write with bytes helper functions
708  * @offset:bytes offset from MMIO start
709  * @value: the value want to be written to the register
710  */
711 
712 /**
713  * amdgpu_mm_wreg8 - read a memory mapped IO register
714  *
715  * @adev: amdgpu_device pointer
716  * @offset: byte aligned register offset
717  * @value: 8 bit value to write
718  *
719  * Writes the value specified to the offset specified.
720  */
amdgpu_mm_wreg8(struct amdgpu_device * adev,uint32_t offset,uint8_t value)721 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
722 {
723 	if (amdgpu_device_skip_hw_access(adev))
724 		return;
725 
726 	if (offset < adev->rmmio_size)
727 		writeb(value, adev->rmmio + offset);
728 	else
729 		BUG();
730 }
731 
732 /**
733  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
734  *
735  * @adev: amdgpu_device pointer
736  * @reg: dword aligned register offset
737  * @v: 32 bit value to write to the register
738  * @acc_flags: access flags which require special behavior
739  *
740  * Writes the value specified to the offset specified.
741  */
amdgpu_device_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags)742 void amdgpu_device_wreg(struct amdgpu_device *adev,
743 			uint32_t reg, uint32_t v,
744 			uint32_t acc_flags)
745 {
746 	if (amdgpu_device_skip_hw_access(adev))
747 		return;
748 
749 	if ((reg * 4) < adev->rmmio_size) {
750 		if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
751 		    amdgpu_sriov_runtime(adev) &&
752 		    down_read_trylock(&adev->reset_domain->sem)) {
753 			amdgpu_kiq_wreg(adev, reg, v, 0);
754 			up_read(&adev->reset_domain->sem);
755 		} else {
756 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
757 		}
758 	} else {
759 		adev->pcie_wreg(adev, reg * 4, v);
760 	}
761 
762 	trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
763 }
764 
765 /**
766  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
767  *
768  * @adev: amdgpu_device pointer
769  * @reg: mmio/rlc register
770  * @v: value to write
771  * @xcc_id: xcc accelerated compute core id
772  *
773  * this function is invoked only for the debugfs register access
774  */
amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)775 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
776 			     uint32_t reg, uint32_t v,
777 			     uint32_t xcc_id)
778 {
779 	if (amdgpu_device_skip_hw_access(adev))
780 		return;
781 
782 	if (amdgpu_sriov_fullaccess(adev) &&
783 	    adev->gfx.rlc.funcs &&
784 	    adev->gfx.rlc.funcs->is_rlcg_access_range) {
785 		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
786 			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);
787 	} else if ((reg * 4) >= adev->rmmio_size) {
788 		adev->pcie_wreg(adev, reg * 4, v);
789 	} else {
790 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
791 	}
792 }
793 
794 /**
795  * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
796  *
797  * @adev: amdgpu_device pointer
798  * @reg: dword aligned register offset
799  * @v: 32 bit value to write to the register
800  * @acc_flags: access flags which require special behavior
801  * @xcc_id: xcc accelerated compute core id
802  *
803  * Writes the value specified to the offset specified.
804  */
amdgpu_device_xcc_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t acc_flags,uint32_t xcc_id)805 void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
806 			uint32_t reg, uint32_t v,
807 			uint32_t acc_flags, uint32_t xcc_id)
808 {
809 	uint32_t rlcg_flag;
810 
811 	if (amdgpu_device_skip_hw_access(adev))
812 		return;
813 
814 	if ((reg * 4) < adev->rmmio_size) {
815 		if (amdgpu_sriov_vf(adev) &&
816 		    !amdgpu_sriov_runtime(adev) &&
817 		    adev->gfx.rlc.rlcg_reg_access_supported &&
818 		    amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
819 							 GC_HWIP, true,
820 							 &rlcg_flag)) {
821 			amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
822 		} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
823 		    amdgpu_sriov_runtime(adev) &&
824 		    down_read_trylock(&adev->reset_domain->sem)) {
825 			amdgpu_kiq_wreg(adev, reg, v, xcc_id);
826 			up_read(&adev->reset_domain->sem);
827 		} else {
828 			writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
829 		}
830 	} else {
831 		adev->pcie_wreg(adev, reg * 4, v);
832 	}
833 }
834 
835 /**
836  * amdgpu_device_indirect_rreg - read an indirect register
837  *
838  * @adev: amdgpu_device pointer
839  * @reg_addr: indirect register address to read from
840  *
841  * Returns the value of indirect register @reg_addr
842  */
amdgpu_device_indirect_rreg(struct amdgpu_device * adev,u32 reg_addr)843 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
844 				u32 reg_addr)
845 {
846 	unsigned long flags, pcie_index, pcie_data;
847 	void __iomem *pcie_index_offset;
848 	void __iomem *pcie_data_offset;
849 	u32 r;
850 
851 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
852 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
853 
854 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
855 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
856 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
857 
858 	writel(reg_addr, pcie_index_offset);
859 	readl(pcie_index_offset);
860 	r = readl(pcie_data_offset);
861 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
862 
863 	return r;
864 }
865 
amdgpu_device_indirect_rreg_ext(struct amdgpu_device * adev,u64 reg_addr)866 u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
867 				    u64 reg_addr)
868 {
869 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
870 	u32 r;
871 	void __iomem *pcie_index_offset;
872 	void __iomem *pcie_index_hi_offset;
873 	void __iomem *pcie_data_offset;
874 
875 	if (unlikely(!adev->nbio.funcs)) {
876 		pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
877 		pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
878 	} else {
879 		pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
880 		pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
881 	}
882 
883 	if (reg_addr >> 32) {
884 		if (unlikely(!adev->nbio.funcs))
885 			pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
886 		else
887 			pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
888 	} else {
889 		pcie_index_hi = 0;
890 	}
891 
892 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
893 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
894 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
895 	if (pcie_index_hi != 0)
896 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
897 				pcie_index_hi * 4;
898 
899 	writel(reg_addr, pcie_index_offset);
900 	readl(pcie_index_offset);
901 	if (pcie_index_hi != 0) {
902 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
903 		readl(pcie_index_hi_offset);
904 	}
905 	r = readl(pcie_data_offset);
906 
907 	/* clear the high bits */
908 	if (pcie_index_hi != 0) {
909 		writel(0, pcie_index_hi_offset);
910 		readl(pcie_index_hi_offset);
911 	}
912 
913 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
914 
915 	return r;
916 }
917 
918 /**
919  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
920  *
921  * @adev: amdgpu_device pointer
922  * @reg_addr: indirect register address to read from
923  *
924  * Returns the value of indirect register @reg_addr
925  */
amdgpu_device_indirect_rreg64(struct amdgpu_device * adev,u32 reg_addr)926 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
927 				  u32 reg_addr)
928 {
929 	unsigned long flags, pcie_index, pcie_data;
930 	void __iomem *pcie_index_offset;
931 	void __iomem *pcie_data_offset;
932 	u64 r;
933 
934 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
935 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
936 
937 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
938 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
939 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
940 
941 	/* read low 32 bits */
942 	writel(reg_addr, pcie_index_offset);
943 	readl(pcie_index_offset);
944 	r = readl(pcie_data_offset);
945 	/* read high 32 bits */
946 	writel(reg_addr + 4, pcie_index_offset);
947 	readl(pcie_index_offset);
948 	r |= ((u64)readl(pcie_data_offset) << 32);
949 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
950 
951 	return r;
952 }
953 
amdgpu_device_indirect_rreg64_ext(struct amdgpu_device * adev,u64 reg_addr)954 u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
955 				  u64 reg_addr)
956 {
957 	unsigned long flags, pcie_index, pcie_data;
958 	unsigned long pcie_index_hi = 0;
959 	void __iomem *pcie_index_offset;
960 	void __iomem *pcie_index_hi_offset;
961 	void __iomem *pcie_data_offset;
962 	u64 r;
963 
964 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
965 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
966 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
967 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
968 
969 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
970 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
971 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
972 	if (pcie_index_hi != 0)
973 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
974 			pcie_index_hi * 4;
975 
976 	/* read low 32 bits */
977 	writel(reg_addr, pcie_index_offset);
978 	readl(pcie_index_offset);
979 	if (pcie_index_hi != 0) {
980 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
981 		readl(pcie_index_hi_offset);
982 	}
983 	r = readl(pcie_data_offset);
984 	/* read high 32 bits */
985 	writel(reg_addr + 4, pcie_index_offset);
986 	readl(pcie_index_offset);
987 	if (pcie_index_hi != 0) {
988 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
989 		readl(pcie_index_hi_offset);
990 	}
991 	r |= ((u64)readl(pcie_data_offset) << 32);
992 
993 	/* clear the high bits */
994 	if (pcie_index_hi != 0) {
995 		writel(0, pcie_index_hi_offset);
996 		readl(pcie_index_hi_offset);
997 	}
998 
999 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1000 
1001 	return r;
1002 }
1003 
1004 /**
1005  * amdgpu_device_indirect_wreg - write an indirect register address
1006  *
1007  * @adev: amdgpu_device pointer
1008  * @reg_addr: indirect register offset
1009  * @reg_data: indirect register data
1010  *
1011  */
amdgpu_device_indirect_wreg(struct amdgpu_device * adev,u32 reg_addr,u32 reg_data)1012 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1013 				 u32 reg_addr, u32 reg_data)
1014 {
1015 	unsigned long flags, pcie_index, pcie_data;
1016 	void __iomem *pcie_index_offset;
1017 	void __iomem *pcie_data_offset;
1018 
1019 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1020 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1021 
1022 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1023 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1024 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1025 
1026 	writel(reg_addr, pcie_index_offset);
1027 	readl(pcie_index_offset);
1028 	writel(reg_data, pcie_data_offset);
1029 	readl(pcie_data_offset);
1030 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1031 }
1032 
amdgpu_device_indirect_wreg_ext(struct amdgpu_device * adev,u64 reg_addr,u32 reg_data)1033 void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1034 				     u64 reg_addr, u32 reg_data)
1035 {
1036 	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1037 	void __iomem *pcie_index_offset;
1038 	void __iomem *pcie_index_hi_offset;
1039 	void __iomem *pcie_data_offset;
1040 
1041 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1042 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1043 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1044 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1045 	else
1046 		pcie_index_hi = 0;
1047 
1048 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1049 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1050 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1051 	if (pcie_index_hi != 0)
1052 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1053 				pcie_index_hi * 4;
1054 
1055 	writel(reg_addr, pcie_index_offset);
1056 	readl(pcie_index_offset);
1057 	if (pcie_index_hi != 0) {
1058 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1059 		readl(pcie_index_hi_offset);
1060 	}
1061 	writel(reg_data, pcie_data_offset);
1062 	readl(pcie_data_offset);
1063 
1064 	/* clear the high bits */
1065 	if (pcie_index_hi != 0) {
1066 		writel(0, pcie_index_hi_offset);
1067 		readl(pcie_index_hi_offset);
1068 	}
1069 
1070 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1071 }
1072 
1073 /**
1074  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1075  *
1076  * @adev: amdgpu_device pointer
1077  * @reg_addr: indirect register offset
1078  * @reg_data: indirect register data
1079  *
1080  */
amdgpu_device_indirect_wreg64(struct amdgpu_device * adev,u32 reg_addr,u64 reg_data)1081 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1082 				   u32 reg_addr, u64 reg_data)
1083 {
1084 	unsigned long flags, pcie_index, pcie_data;
1085 	void __iomem *pcie_index_offset;
1086 	void __iomem *pcie_data_offset;
1087 
1088 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1089 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1090 
1091 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1092 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1093 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1094 
1095 	/* write low 32 bits */
1096 	writel(reg_addr, pcie_index_offset);
1097 	readl(pcie_index_offset);
1098 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1099 	readl(pcie_data_offset);
1100 	/* write high 32 bits */
1101 	writel(reg_addr + 4, pcie_index_offset);
1102 	readl(pcie_index_offset);
1103 	writel((u32)(reg_data >> 32), pcie_data_offset);
1104 	readl(pcie_data_offset);
1105 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1106 }
1107 
amdgpu_device_indirect_wreg64_ext(struct amdgpu_device * adev,u64 reg_addr,u64 reg_data)1108 void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1109 				   u64 reg_addr, u64 reg_data)
1110 {
1111 	unsigned long flags, pcie_index, pcie_data;
1112 	unsigned long pcie_index_hi = 0;
1113 	void __iomem *pcie_index_offset;
1114 	void __iomem *pcie_index_hi_offset;
1115 	void __iomem *pcie_data_offset;
1116 
1117 	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1118 	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1119 	if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1120 		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1121 
1122 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1123 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
1124 	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
1125 	if (pcie_index_hi != 0)
1126 		pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1127 				pcie_index_hi * 4;
1128 
1129 	/* write low 32 bits */
1130 	writel(reg_addr, pcie_index_offset);
1131 	readl(pcie_index_offset);
1132 	if (pcie_index_hi != 0) {
1133 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1134 		readl(pcie_index_hi_offset);
1135 	}
1136 	writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
1137 	readl(pcie_data_offset);
1138 	/* write high 32 bits */
1139 	writel(reg_addr + 4, pcie_index_offset);
1140 	readl(pcie_index_offset);
1141 	if (pcie_index_hi != 0) {
1142 		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset);
1143 		readl(pcie_index_hi_offset);
1144 	}
1145 	writel((u32)(reg_data >> 32), pcie_data_offset);
1146 	readl(pcie_data_offset);
1147 
1148 	/* clear the high bits */
1149 	if (pcie_index_hi != 0) {
1150 		writel(0, pcie_index_hi_offset);
1151 		readl(pcie_index_hi_offset);
1152 	}
1153 
1154 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
1155 }
1156 
1157 /**
1158  * amdgpu_device_get_rev_id - query device rev_id
1159  *
1160  * @adev: amdgpu_device pointer
1161  *
1162  * Return device rev_id
1163  */
amdgpu_device_get_rev_id(struct amdgpu_device * adev)1164 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1165 {
1166 	return adev->nbio.funcs->get_rev_id(adev);
1167 }
1168 
1169 /**
1170  * amdgpu_invalid_rreg - dummy reg read function
1171  *
1172  * @adev: amdgpu_device pointer
1173  * @reg: offset of register
1174  *
1175  * Dummy register read function.  Used for register blocks
1176  * that certain asics don't have (all asics).
1177  * Returns the value in the register.
1178  */
amdgpu_invalid_rreg(struct amdgpu_device * adev,uint32_t reg)1179 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1180 {
1181 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1182 	BUG();
1183 	return 0;
1184 }
1185 
amdgpu_invalid_rreg_ext(struct amdgpu_device * adev,uint64_t reg)1186 static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1187 {
1188 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1189 	BUG();
1190 	return 0;
1191 }
1192 
1193 /**
1194  * amdgpu_invalid_wreg - dummy reg write function
1195  *
1196  * @adev: amdgpu_device pointer
1197  * @reg: offset of register
1198  * @v: value to write to the register
1199  *
1200  * Dummy register read function.  Used for register blocks
1201  * that certain asics don't have (all asics).
1202  */
amdgpu_invalid_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v)1203 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1204 {
1205 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1206 		  reg, v);
1207 	BUG();
1208 }
1209 
amdgpu_invalid_wreg_ext(struct amdgpu_device * adev,uint64_t reg,uint32_t v)1210 static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1211 {
1212 	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1213 		  reg, v);
1214 	BUG();
1215 }
1216 
1217 /**
1218  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1219  *
1220  * @adev: amdgpu_device pointer
1221  * @reg: offset of register
1222  *
1223  * Dummy register read function.  Used for register blocks
1224  * that certain asics don't have (all asics).
1225  * Returns the value in the register.
1226  */
amdgpu_invalid_rreg64(struct amdgpu_device * adev,uint32_t reg)1227 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1228 {
1229 	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1230 	BUG();
1231 	return 0;
1232 }
1233 
amdgpu_invalid_rreg64_ext(struct amdgpu_device * adev,uint64_t reg)1234 static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1235 {
1236 	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1237 	BUG();
1238 	return 0;
1239 }
1240 
1241 /**
1242  * amdgpu_invalid_wreg64 - dummy reg write function
1243  *
1244  * @adev: amdgpu_device pointer
1245  * @reg: offset of register
1246  * @v: value to write to the register
1247  *
1248  * Dummy register read function.  Used for register blocks
1249  * that certain asics don't have (all asics).
1250  */
amdgpu_invalid_wreg64(struct amdgpu_device * adev,uint32_t reg,uint64_t v)1251 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1252 {
1253 	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1254 		  reg, v);
1255 	BUG();
1256 }
1257 
amdgpu_invalid_wreg64_ext(struct amdgpu_device * adev,uint64_t reg,uint64_t v)1258 static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1259 {
1260 	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1261 		  reg, v);
1262 	BUG();
1263 }
1264 
1265 /**
1266  * amdgpu_block_invalid_rreg - dummy reg read function
1267  *
1268  * @adev: amdgpu_device pointer
1269  * @block: offset of instance
1270  * @reg: offset of register
1271  *
1272  * Dummy register read function.  Used for register blocks
1273  * that certain asics don't have (all asics).
1274  * Returns the value in the register.
1275  */
amdgpu_block_invalid_rreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg)1276 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1277 					  uint32_t block, uint32_t reg)
1278 {
1279 	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1280 		  reg, block);
1281 	BUG();
1282 	return 0;
1283 }
1284 
1285 /**
1286  * amdgpu_block_invalid_wreg - dummy reg write function
1287  *
1288  * @adev: amdgpu_device pointer
1289  * @block: offset of instance
1290  * @reg: offset of register
1291  * @v: value to write to the register
1292  *
1293  * Dummy register read function.  Used for register blocks
1294  * that certain asics don't have (all asics).
1295  */
amdgpu_block_invalid_wreg(struct amdgpu_device * adev,uint32_t block,uint32_t reg,uint32_t v)1296 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1297 				      uint32_t block,
1298 				      uint32_t reg, uint32_t v)
1299 {
1300 	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1301 		  reg, block, v);
1302 	BUG();
1303 }
1304 
1305 /**
1306  * amdgpu_device_asic_init - Wrapper for atom asic_init
1307  *
1308  * @adev: amdgpu_device pointer
1309  *
1310  * Does any asic specific work and then calls atom asic init.
1311  */
amdgpu_device_asic_init(struct amdgpu_device * adev)1312 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1313 {
1314 	int ret;
1315 
1316 	amdgpu_asic_pre_asic_init(adev);
1317 
1318 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
1319 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
1320 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
1321 		amdgpu_psp_wait_for_bootloader(adev);
1322 		ret = amdgpu_atomfirmware_asic_init(adev, true);
1323 		return ret;
1324 	} else {
1325 		return amdgpu_atom_asic_init(adev->mode_info.atom_context);
1326 	}
1327 
1328 	return 0;
1329 }
1330 
1331 /**
1332  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1333  *
1334  * @adev: amdgpu_device pointer
1335  *
1336  * Allocates a scratch page of VRAM for use by various things in the
1337  * driver.
1338  */
amdgpu_device_mem_scratch_init(struct amdgpu_device * adev)1339 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1340 {
1341 	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1342 				       AMDGPU_GEM_DOMAIN_VRAM |
1343 				       AMDGPU_GEM_DOMAIN_GTT,
1344 				       &adev->mem_scratch.robj,
1345 				       &adev->mem_scratch.gpu_addr,
1346 				       (void **)&adev->mem_scratch.ptr);
1347 }
1348 
1349 /**
1350  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1351  *
1352  * @adev: amdgpu_device pointer
1353  *
1354  * Frees the VRAM scratch page.
1355  */
amdgpu_device_mem_scratch_fini(struct amdgpu_device * adev)1356 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1357 {
1358 	amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
1359 }
1360 
1361 /**
1362  * amdgpu_device_program_register_sequence - program an array of registers.
1363  *
1364  * @adev: amdgpu_device pointer
1365  * @registers: pointer to the register array
1366  * @array_size: size of the register array
1367  *
1368  * Programs an array or registers with and or masks.
1369  * This is a helper for setting golden registers.
1370  */
amdgpu_device_program_register_sequence(struct amdgpu_device * adev,const u32 * registers,const u32 array_size)1371 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1372 					     const u32 *registers,
1373 					     const u32 array_size)
1374 {
1375 	u32 tmp, reg, and_mask, or_mask;
1376 	int i;
1377 
1378 	if (array_size % 3)
1379 		return;
1380 
1381 	for (i = 0; i < array_size; i += 3) {
1382 		reg = registers[i + 0];
1383 		and_mask = registers[i + 1];
1384 		or_mask = registers[i + 2];
1385 
1386 		if (and_mask == 0xffffffff) {
1387 			tmp = or_mask;
1388 		} else {
1389 			tmp = RREG32(reg);
1390 			tmp &= ~and_mask;
1391 			if (adev->family >= AMDGPU_FAMILY_AI)
1392 				tmp |= (or_mask & and_mask);
1393 			else
1394 				tmp |= or_mask;
1395 		}
1396 		WREG32(reg, tmp);
1397 	}
1398 }
1399 
1400 /**
1401  * amdgpu_device_pci_config_reset - reset the GPU
1402  *
1403  * @adev: amdgpu_device pointer
1404  *
1405  * Resets the GPU using the pci config reset sequence.
1406  * Only applicable to asics prior to vega10.
1407  */
amdgpu_device_pci_config_reset(struct amdgpu_device * adev)1408 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1409 {
1410 	pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1411 }
1412 
1413 /**
1414  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1415  *
1416  * @adev: amdgpu_device pointer
1417  *
1418  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1419  */
amdgpu_device_pci_reset(struct amdgpu_device * adev)1420 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1421 {
1422 	STUB();
1423 	return -ENOSYS;
1424 #ifdef notyet
1425 	return pci_reset_function(adev->pdev);
1426 #endif
1427 }
1428 
1429 /*
1430  * amdgpu_device_wb_*()
1431  * Writeback is the method by which the GPU updates special pages in memory
1432  * with the status of certain GPU events (fences, ring pointers,etc.).
1433  */
1434 
1435 /**
1436  * amdgpu_device_wb_fini - Disable Writeback and free memory
1437  *
1438  * @adev: amdgpu_device pointer
1439  *
1440  * Disables Writeback and frees the Writeback memory (all asics).
1441  * Used at driver shutdown.
1442  */
amdgpu_device_wb_fini(struct amdgpu_device * adev)1443 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1444 {
1445 	if (adev->wb.wb_obj) {
1446 		amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1447 				      &adev->wb.gpu_addr,
1448 				      (void **)&adev->wb.wb);
1449 		adev->wb.wb_obj = NULL;
1450 	}
1451 }
1452 
1453 /**
1454  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1455  *
1456  * @adev: amdgpu_device pointer
1457  *
1458  * Initializes writeback and allocates writeback memory (all asics).
1459  * Used at driver startup.
1460  * Returns 0 on success or an -error on failure.
1461  */
amdgpu_device_wb_init(struct amdgpu_device * adev)1462 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1463 {
1464 	int r;
1465 
1466 	if (adev->wb.wb_obj == NULL) {
1467 		/* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1468 		r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1469 					    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1470 					    &adev->wb.wb_obj, &adev->wb.gpu_addr,
1471 					    (void **)&adev->wb.wb);
1472 		if (r) {
1473 			dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1474 			return r;
1475 		}
1476 
1477 		adev->wb.num_wb = AMDGPU_MAX_WB;
1478 		memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1479 
1480 		/* clear wb memory */
1481 		memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1482 	}
1483 
1484 	return 0;
1485 }
1486 
1487 /**
1488  * amdgpu_device_wb_get - Allocate a wb entry
1489  *
1490  * @adev: amdgpu_device pointer
1491  * @wb: wb index
1492  *
1493  * Allocate a wb slot for use by the driver (all asics).
1494  * Returns 0 on success or -EINVAL on failure.
1495  */
amdgpu_device_wb_get(struct amdgpu_device * adev,u32 * wb)1496 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1497 {
1498 	unsigned long flags, offset;
1499 
1500 	spin_lock_irqsave(&adev->wb.lock, flags);
1501 	offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1502 	if (offset < adev->wb.num_wb) {
1503 		__set_bit(offset, adev->wb.used);
1504 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1505 		*wb = offset << 3; /* convert to dw offset */
1506 		return 0;
1507 	} else {
1508 		spin_unlock_irqrestore(&adev->wb.lock, flags);
1509 		return -EINVAL;
1510 	}
1511 }
1512 
1513 /**
1514  * amdgpu_device_wb_free - Free a wb entry
1515  *
1516  * @adev: amdgpu_device pointer
1517  * @wb: wb index
1518  *
1519  * Free a wb slot allocated for use by the driver (all asics)
1520  */
amdgpu_device_wb_free(struct amdgpu_device * adev,u32 wb)1521 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1522 {
1523 	unsigned long flags;
1524 
1525 	wb >>= 3;
1526 	spin_lock_irqsave(&adev->wb.lock, flags);
1527 	if (wb < adev->wb.num_wb)
1528 		__clear_bit(wb, adev->wb.used);
1529 	spin_unlock_irqrestore(&adev->wb.lock, flags);
1530 }
1531 
1532 /**
1533  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1534  *
1535  * @adev: amdgpu_device pointer
1536  *
1537  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1538  * to fail, but if any of the BARs is not accessible after the size we abort
1539  * driver loading by returning -ENODEV.
1540  */
amdgpu_device_resize_fb_bar(struct amdgpu_device * adev)1541 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1542 {
1543 #ifdef __linux__
1544 	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1545 	struct pci_bus *root;
1546 	struct resource *res;
1547 	unsigned int i;
1548 	u16 cmd;
1549 	int r;
1550 
1551 	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1552 		return 0;
1553 
1554 	/* Bypass for VF */
1555 	if (amdgpu_sriov_vf(adev))
1556 		return 0;
1557 
1558 	/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
1559 	if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
1560 		DRM_WARN("System can't access extended configuration space, please check!!\n");
1561 
1562 	/* skip if the bios has already enabled large BAR */
1563 	if (adev->gmc.real_vram_size &&
1564 	    (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1565 		return 0;
1566 
1567 	/* Check if the root BUS has 64bit memory resources */
1568 	root = adev->pdev->bus;
1569 	while (root->parent)
1570 		root = root->parent;
1571 
1572 	pci_bus_for_each_resource(root, res, i) {
1573 		if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1574 		    res->start > 0x100000000ull)
1575 			break;
1576 	}
1577 
1578 	/* Trying to resize is pointless without a root hub window above 4GB */
1579 	if (!res)
1580 		return 0;
1581 
1582 	/* Limit the BAR size to what is available */
1583 	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1584 			rbar_size);
1585 
1586 	/* Disable memory decoding while we change the BAR addresses and size */
1587 	pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1588 	pci_write_config_word(adev->pdev, PCI_COMMAND,
1589 			      cmd & ~PCI_COMMAND_MEMORY);
1590 
1591 	/* Free the VRAM and doorbell BAR, we most likely need to move both. */
1592 	amdgpu_doorbell_fini(adev);
1593 	if (adev->asic_type >= CHIP_BONAIRE)
1594 		pci_release_resource(adev->pdev, 2);
1595 
1596 	pci_release_resource(adev->pdev, 0);
1597 
1598 	r = pci_resize_resource(adev->pdev, 0, rbar_size);
1599 	if (r == -ENOSPC)
1600 		DRM_INFO("Not enough PCI address space for a large BAR.");
1601 	else if (r && r != -ENOTSUPP)
1602 		DRM_ERROR("Problem resizing BAR0 (%d).", r);
1603 
1604 	pci_assign_unassigned_bus_resources(adev->pdev->bus);
1605 
1606 	/* When the doorbell or fb BAR isn't available we have no chance of
1607 	 * using the device.
1608 	 */
1609 	r = amdgpu_doorbell_init(adev);
1610 	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1611 		return -ENODEV;
1612 
1613 	pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1614 #endif /* __linux__ */
1615 
1616 	return 0;
1617 }
1618 
amdgpu_device_read_bios(struct amdgpu_device * adev)1619 static bool amdgpu_device_read_bios(struct amdgpu_device *adev)
1620 {
1621 	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1622 		return false;
1623 
1624 	return true;
1625 }
1626 
1627 /*
1628  * GPU helpers function.
1629  */
1630 /**
1631  * amdgpu_device_need_post - check if the hw need post or not
1632  *
1633  * @adev: amdgpu_device pointer
1634  *
1635  * Check if the asic has been initialized (all asics) at driver startup
1636  * or post is needed if  hw reset is performed.
1637  * Returns true if need or false if not.
1638  */
amdgpu_device_need_post(struct amdgpu_device * adev)1639 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1640 {
1641 	uint32_t reg;
1642 
1643 	if (amdgpu_sriov_vf(adev))
1644 		return false;
1645 
1646 	if (!amdgpu_device_read_bios(adev))
1647 		return false;
1648 
1649 	if (amdgpu_passthrough(adev)) {
1650 		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1651 		 * some old smc fw still need driver do vPost otherwise gpu hang, while
1652 		 * those smc fw version above 22.15 doesn't have this flaw, so we force
1653 		 * vpost executed for smc version below 22.15
1654 		 */
1655 		if (adev->asic_type == CHIP_FIJI) {
1656 			int err;
1657 			uint32_t fw_ver;
1658 
1659 			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1660 			/* force vPost if error occured */
1661 			if (err)
1662 				return true;
1663 
1664 			fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1665 			release_firmware(adev->pm.fw);
1666 			if (fw_ver < 0x00160e00)
1667 				return true;
1668 		}
1669 	}
1670 
1671 	/* Don't post if we need to reset whole hive on init */
1672 	if (adev->gmc.xgmi.pending_reset)
1673 		return false;
1674 
1675 	if (adev->has_hw_reset) {
1676 		adev->has_hw_reset = false;
1677 		return true;
1678 	}
1679 
1680 	/* bios scratch used on CIK+ */
1681 	if (adev->asic_type >= CHIP_BONAIRE)
1682 		return amdgpu_atombios_scratch_need_asic_init(adev);
1683 
1684 	/* check MEM_SIZE for older asics */
1685 	reg = amdgpu_asic_get_config_memsize(adev);
1686 
1687 	if ((reg != 0) && (reg != 0xffffffff))
1688 		return false;
1689 
1690 	return true;
1691 }
1692 
1693 /*
1694  * Check whether seamless boot is supported.
1695  *
1696  * So far we only support seamless boot on DCE 3.0 or later.
1697  * If users report that it works on older ASICS as well, we may
1698  * loosen this.
1699  */
amdgpu_device_seamless_boot_supported(struct amdgpu_device * adev)1700 bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1701 {
1702 	switch (amdgpu_seamless) {
1703 	case -1:
1704 		break;
1705 	case 1:
1706 		return true;
1707 	case 0:
1708 		return false;
1709 	default:
1710 		DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1711 			  amdgpu_seamless);
1712 		return false;
1713 	}
1714 
1715 	if (!(adev->flags & AMD_IS_APU))
1716 		return false;
1717 
1718 	if (adev->mman.keep_stolen_vga_memory)
1719 		return false;
1720 
1721 	return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0);
1722 }
1723 
1724 /*
1725  * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1726  * don't support dynamic speed switching. Until we have confirmation from Intel
1727  * that a specific host supports it, it's safer that we keep it disabled for all.
1728  *
1729  * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1730  * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1731  */
amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device * adev)1732 static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1733 {
1734 #if IS_ENABLED(CONFIG_X86)
1735 #ifdef __linux__
1736 	struct cpuinfo_x86 *c = &cpu_data(0);
1737 
1738 	/* eGPU change speeds based on USB4 fabric conditions */
1739 	if (dev_is_removable(adev->dev))
1740 		return true;
1741 
1742 	if (c->x86_vendor == X86_VENDOR_INTEL)
1743 #else
1744 	if (strcmp(cpu_vendor, "GenuineIntel") == 0)
1745 #endif
1746 		return false;
1747 #endif
1748 	return true;
1749 }
1750 
1751 /**
1752  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1753  *
1754  * @adev: amdgpu_device pointer
1755  *
1756  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1757  * be set for this device.
1758  *
1759  * Returns true if it should be used or false if not.
1760  */
amdgpu_device_should_use_aspm(struct amdgpu_device * adev)1761 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1762 {
1763 	switch (amdgpu_aspm) {
1764 	case -1:
1765 		break;
1766 	case 0:
1767 		return false;
1768 	case 1:
1769 		return true;
1770 	default:
1771 		return false;
1772 	}
1773 	if (adev->flags & AMD_IS_APU)
1774 		return false;
1775 	if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK))
1776 		return false;
1777 	return pcie_aspm_enabled(adev->pdev);
1778 }
1779 
1780 /* if we get transitioned to only one device, take VGA back */
1781 /**
1782  * amdgpu_device_vga_set_decode - enable/disable vga decode
1783  *
1784  * @pdev: PCI device pointer
1785  * @state: enable/disable vga decode
1786  *
1787  * Enable/disable vga decode (all asics).
1788  * Returns VGA resource flags.
1789  */
1790 #ifdef notyet
amdgpu_device_vga_set_decode(struct pci_dev * pdev,bool state)1791 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1792 		bool state)
1793 {
1794 	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1795 
1796 	amdgpu_asic_set_vga_state(adev, state);
1797 	if (state)
1798 		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1799 		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1800 	else
1801 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1802 }
1803 #endif
1804 
1805 /**
1806  * amdgpu_device_check_block_size - validate the vm block size
1807  *
1808  * @adev: amdgpu_device pointer
1809  *
1810  * Validates the vm block size specified via module parameter.
1811  * The vm block size defines number of bits in page table versus page directory,
1812  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1813  * page table and the remaining bits are in the page directory.
1814  */
amdgpu_device_check_block_size(struct amdgpu_device * adev)1815 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1816 {
1817 	/* defines number of bits in page table versus page directory,
1818 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1819 	 * page table and the remaining bits are in the page directory
1820 	 */
1821 	if (amdgpu_vm_block_size == -1)
1822 		return;
1823 
1824 	if (amdgpu_vm_block_size < 9) {
1825 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
1826 			 amdgpu_vm_block_size);
1827 		amdgpu_vm_block_size = -1;
1828 	}
1829 }
1830 
1831 /**
1832  * amdgpu_device_check_vm_size - validate the vm size
1833  *
1834  * @adev: amdgpu_device pointer
1835  *
1836  * Validates the vm size in GB specified via module parameter.
1837  * The VM size is the size of the GPU virtual memory space in GB.
1838  */
amdgpu_device_check_vm_size(struct amdgpu_device * adev)1839 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1840 {
1841 	/* no need to check the default value */
1842 	if (amdgpu_vm_size == -1)
1843 		return;
1844 
1845 	if (amdgpu_vm_size < 1) {
1846 		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1847 			 amdgpu_vm_size);
1848 		amdgpu_vm_size = -1;
1849 	}
1850 }
1851 
amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device * adev)1852 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1853 {
1854 #ifdef __linux__
1855 	struct sysinfo si;
1856 #endif
1857 	bool is_os_64 = (sizeof(void *) == 8);
1858 	uint64_t total_memory;
1859 	uint64_t dram_size_seven_GB = 0x1B8000000;
1860 	uint64_t dram_size_three_GB = 0xB8000000;
1861 
1862 	if (amdgpu_smu_memory_pool_size == 0)
1863 		return;
1864 
1865 	if (!is_os_64) {
1866 		DRM_WARN("Not 64-bit OS, feature not supported\n");
1867 		goto def_value;
1868 	}
1869 #ifdef __linux__
1870 	si_meminfo(&si);
1871 	total_memory = (uint64_t)si.totalram * si.mem_unit;
1872 #else
1873 	total_memory = ptoa(physmem);
1874 #endif
1875 
1876 	if ((amdgpu_smu_memory_pool_size == 1) ||
1877 		(amdgpu_smu_memory_pool_size == 2)) {
1878 		if (total_memory < dram_size_three_GB)
1879 			goto def_value1;
1880 	} else if ((amdgpu_smu_memory_pool_size == 4) ||
1881 		(amdgpu_smu_memory_pool_size == 8)) {
1882 		if (total_memory < dram_size_seven_GB)
1883 			goto def_value1;
1884 	} else {
1885 		DRM_WARN("Smu memory pool size not supported\n");
1886 		goto def_value;
1887 	}
1888 	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1889 
1890 	return;
1891 
1892 def_value1:
1893 	DRM_WARN("No enough system memory\n");
1894 def_value:
1895 	adev->pm.smu_prv_buffer_size = 0;
1896 }
1897 
amdgpu_device_init_apu_flags(struct amdgpu_device * adev)1898 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1899 {
1900 	if (!(adev->flags & AMD_IS_APU) ||
1901 	    adev->asic_type < CHIP_RAVEN)
1902 		return 0;
1903 
1904 	switch (adev->asic_type) {
1905 	case CHIP_RAVEN:
1906 		if (adev->pdev->device == 0x15dd)
1907 			adev->apu_flags |= AMD_APU_IS_RAVEN;
1908 		if (adev->pdev->device == 0x15d8)
1909 			adev->apu_flags |= AMD_APU_IS_PICASSO;
1910 		break;
1911 	case CHIP_RENOIR:
1912 		if ((adev->pdev->device == 0x1636) ||
1913 		    (adev->pdev->device == 0x164c))
1914 			adev->apu_flags |= AMD_APU_IS_RENOIR;
1915 		else
1916 			adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1917 		break;
1918 	case CHIP_VANGOGH:
1919 		adev->apu_flags |= AMD_APU_IS_VANGOGH;
1920 		break;
1921 	case CHIP_YELLOW_CARP:
1922 		break;
1923 	case CHIP_CYAN_SKILLFISH:
1924 		if ((adev->pdev->device == 0x13FE) ||
1925 		    (adev->pdev->device == 0x143F))
1926 			adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1927 		break;
1928 	default:
1929 		break;
1930 	}
1931 
1932 	return 0;
1933 }
1934 
1935 /**
1936  * amdgpu_device_check_arguments - validate module params
1937  *
1938  * @adev: amdgpu_device pointer
1939  *
1940  * Validates certain module parameters and updates
1941  * the associated values used by the driver (all asics).
1942  */
amdgpu_device_check_arguments(struct amdgpu_device * adev)1943 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1944 {
1945 	int i;
1946 
1947 	if (amdgpu_sched_jobs < 4) {
1948 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1949 			 amdgpu_sched_jobs);
1950 		amdgpu_sched_jobs = 4;
1951 	} else if (!is_power_of_2(amdgpu_sched_jobs)) {
1952 		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1953 			 amdgpu_sched_jobs);
1954 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1955 	}
1956 
1957 	if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1958 		/* gart size must be greater or equal to 32M */
1959 		dev_warn(adev->dev, "gart size (%d) too small\n",
1960 			 amdgpu_gart_size);
1961 		amdgpu_gart_size = -1;
1962 	}
1963 
1964 	if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1965 		/* gtt size must be greater or equal to 32M */
1966 		dev_warn(adev->dev, "gtt size (%d) too small\n",
1967 				 amdgpu_gtt_size);
1968 		amdgpu_gtt_size = -1;
1969 	}
1970 
1971 	/* valid range is between 4 and 9 inclusive */
1972 	if (amdgpu_vm_fragment_size != -1 &&
1973 	    (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1974 		dev_warn(adev->dev, "valid range is between 4 and 9\n");
1975 		amdgpu_vm_fragment_size = -1;
1976 	}
1977 
1978 	if (amdgpu_sched_hw_submission < 2) {
1979 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1980 			 amdgpu_sched_hw_submission);
1981 		amdgpu_sched_hw_submission = 2;
1982 	} else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1983 		dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1984 			 amdgpu_sched_hw_submission);
1985 		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1986 	}
1987 
1988 	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1989 		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1990 		amdgpu_reset_method = -1;
1991 	}
1992 
1993 	amdgpu_device_check_smu_prv_buffer_size(adev);
1994 
1995 	amdgpu_device_check_vm_size(adev);
1996 
1997 	amdgpu_device_check_block_size(adev);
1998 
1999 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
2000 
2001 	for (i = 0; i < MAX_XCP; i++)
2002 		adev->enforce_isolation[i] = !!enforce_isolation;
2003 
2004 	return 0;
2005 }
2006 
2007 #ifdef __linux__
2008 /**
2009  * amdgpu_switcheroo_set_state - set switcheroo state
2010  *
2011  * @pdev: pci dev pointer
2012  * @state: vga_switcheroo state
2013  *
2014  * Callback for the switcheroo driver.  Suspends or resumes
2015  * the asics before or after it is powered up using ACPI methods.
2016  */
amdgpu_switcheroo_set_state(struct pci_dev * pdev,enum vga_switcheroo_state state)2017 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2018 					enum vga_switcheroo_state state)
2019 {
2020 	struct drm_device *dev = pci_get_drvdata(pdev);
2021 	int r;
2022 
2023 	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
2024 		return;
2025 
2026 	if (state == VGA_SWITCHEROO_ON) {
2027 		pr_info("switched on\n");
2028 		/* don't suspend or resume card normally */
2029 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2030 
2031 		pci_set_power_state(pdev, PCI_D0);
2032 		amdgpu_device_load_pci_state(pdev);
2033 		r = pci_enable_device(pdev);
2034 		if (r)
2035 			DRM_WARN("pci_enable_device failed (%d)\n", r);
2036 		amdgpu_device_resume(dev, true);
2037 
2038 		dev->switch_power_state = DRM_SWITCH_POWER_ON;
2039 	} else {
2040 		pr_info("switched off\n");
2041 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2042 		amdgpu_device_prepare(dev);
2043 		amdgpu_device_suspend(dev, true);
2044 		amdgpu_device_cache_pci_state(pdev);
2045 		/* Shut down the device */
2046 		pci_disable_device(pdev);
2047 		pci_set_power_state(pdev, PCI_D3cold);
2048 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2049 	}
2050 }
2051 
2052 /**
2053  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
2054  *
2055  * @pdev: pci dev pointer
2056  *
2057  * Callback for the switcheroo driver.  Check of the switcheroo
2058  * state can be changed.
2059  * Returns true if the state can be changed, false if not.
2060  */
amdgpu_switcheroo_can_switch(struct pci_dev * pdev)2061 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2062 {
2063 	struct drm_device *dev = pci_get_drvdata(pdev);
2064 
2065        /*
2066 	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2067 	* locking inversion with the driver load path. And the access here is
2068 	* completely racy anyway. So don't bother with locking for now.
2069 	*/
2070 	return atomic_read(&dev->open_count) == 0;
2071 }
2072 #endif /* __linux__ */
2073 
2074 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2075 #ifdef notyet
2076 	.set_gpu_state = amdgpu_switcheroo_set_state,
2077 	.reprobe = NULL,
2078 	.can_switch = amdgpu_switcheroo_can_switch,
2079 #endif
2080 };
2081 
2082 /**
2083  * amdgpu_device_ip_set_clockgating_state - set the CG state
2084  *
2085  * @dev: amdgpu_device pointer
2086  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2087  * @state: clockgating state (gate or ungate)
2088  *
2089  * Sets the requested clockgating state for all instances of
2090  * the hardware IP specified.
2091  * Returns the error code from the last instance.
2092  */
amdgpu_device_ip_set_clockgating_state(void * dev,enum amd_ip_block_type block_type,enum amd_clockgating_state state)2093 int amdgpu_device_ip_set_clockgating_state(void *dev,
2094 					   enum amd_ip_block_type block_type,
2095 					   enum amd_clockgating_state state)
2096 {
2097 	struct amdgpu_device *adev = dev;
2098 	int i, r = 0;
2099 
2100 	for (i = 0; i < adev->num_ip_blocks; i++) {
2101 		if (!adev->ip_blocks[i].status.valid)
2102 			continue;
2103 		if (adev->ip_blocks[i].version->type != block_type)
2104 			continue;
2105 		if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2106 			continue;
2107 		r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2108 			(void *)adev, state);
2109 		if (r)
2110 			DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2111 				  adev->ip_blocks[i].version->funcs->name, r);
2112 	}
2113 	return r;
2114 }
2115 
2116 /**
2117  * amdgpu_device_ip_set_powergating_state - set the PG state
2118  *
2119  * @dev: amdgpu_device pointer
2120  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2121  * @state: powergating state (gate or ungate)
2122  *
2123  * Sets the requested powergating state for all instances of
2124  * the hardware IP specified.
2125  * Returns the error code from the last instance.
2126  */
amdgpu_device_ip_set_powergating_state(void * dev,enum amd_ip_block_type block_type,enum amd_powergating_state state)2127 int amdgpu_device_ip_set_powergating_state(void *dev,
2128 					   enum amd_ip_block_type block_type,
2129 					   enum amd_powergating_state state)
2130 {
2131 	struct amdgpu_device *adev = dev;
2132 	int i, r = 0;
2133 
2134 	for (i = 0; i < adev->num_ip_blocks; i++) {
2135 		if (!adev->ip_blocks[i].status.valid)
2136 			continue;
2137 		if (adev->ip_blocks[i].version->type != block_type)
2138 			continue;
2139 		if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2140 			continue;
2141 		r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2142 			(void *)adev, state);
2143 		if (r)
2144 			DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2145 				  adev->ip_blocks[i].version->funcs->name, r);
2146 	}
2147 	return r;
2148 }
2149 
2150 /**
2151  * amdgpu_device_ip_get_clockgating_state - get the CG state
2152  *
2153  * @adev: amdgpu_device pointer
2154  * @flags: clockgating feature flags
2155  *
2156  * Walks the list of IPs on the device and updates the clockgating
2157  * flags for each IP.
2158  * Updates @flags with the feature flags for each hardware IP where
2159  * clockgating is enabled.
2160  */
amdgpu_device_ip_get_clockgating_state(struct amdgpu_device * adev,u64 * flags)2161 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2162 					    u64 *flags)
2163 {
2164 	int i;
2165 
2166 	for (i = 0; i < adev->num_ip_blocks; i++) {
2167 		if (!adev->ip_blocks[i].status.valid)
2168 			continue;
2169 		if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2170 			adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
2171 	}
2172 }
2173 
2174 /**
2175  * amdgpu_device_ip_wait_for_idle - wait for idle
2176  *
2177  * @adev: amdgpu_device pointer
2178  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2179  *
2180  * Waits for the request hardware IP to be idle.
2181  * Returns 0 for success or a negative error code on failure.
2182  */
amdgpu_device_ip_wait_for_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)2183 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2184 				   enum amd_ip_block_type block_type)
2185 {
2186 	int i, r;
2187 
2188 	for (i = 0; i < adev->num_ip_blocks; i++) {
2189 		if (!adev->ip_blocks[i].status.valid)
2190 			continue;
2191 		if (adev->ip_blocks[i].version->type == block_type) {
2192 			r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
2193 			if (r)
2194 				return r;
2195 			break;
2196 		}
2197 	}
2198 	return 0;
2199 
2200 }
2201 
2202 /**
2203  * amdgpu_device_ip_is_idle - is the hardware IP idle
2204  *
2205  * @adev: amdgpu_device pointer
2206  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2207  *
2208  * Check if the hardware IP is idle or not.
2209  * Returns true if it the IP is idle, false if not.
2210  */
amdgpu_device_ip_is_idle(struct amdgpu_device * adev,enum amd_ip_block_type block_type)2211 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
2212 			      enum amd_ip_block_type block_type)
2213 {
2214 	int i;
2215 
2216 	for (i = 0; i < adev->num_ip_blocks; i++) {
2217 		if (!adev->ip_blocks[i].status.valid)
2218 			continue;
2219 		if (adev->ip_blocks[i].version->type == block_type)
2220 			return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
2221 	}
2222 	return true;
2223 
2224 }
2225 
2226 /**
2227  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
2228  *
2229  * @adev: amdgpu_device pointer
2230  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2231  *
2232  * Returns a pointer to the hardware IP block structure
2233  * if it exists for the asic, otherwise NULL.
2234  */
2235 struct amdgpu_ip_block *
amdgpu_device_ip_get_ip_block(struct amdgpu_device * adev,enum amd_ip_block_type type)2236 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2237 			      enum amd_ip_block_type type)
2238 {
2239 	int i;
2240 
2241 	for (i = 0; i < adev->num_ip_blocks; i++)
2242 		if (adev->ip_blocks[i].version->type == type)
2243 			return &adev->ip_blocks[i];
2244 
2245 	return NULL;
2246 }
2247 
2248 /**
2249  * amdgpu_device_ip_block_version_cmp
2250  *
2251  * @adev: amdgpu_device pointer
2252  * @type: enum amd_ip_block_type
2253  * @major: major version
2254  * @minor: minor version
2255  *
2256  * return 0 if equal or greater
2257  * return 1 if smaller or the ip_block doesn't exist
2258  */
amdgpu_device_ip_block_version_cmp(struct amdgpu_device * adev,enum amd_ip_block_type type,u32 major,u32 minor)2259 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2260 				       enum amd_ip_block_type type,
2261 				       u32 major, u32 minor)
2262 {
2263 	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2264 
2265 	if (ip_block && ((ip_block->version->major > major) ||
2266 			((ip_block->version->major == major) &&
2267 			(ip_block->version->minor >= minor))))
2268 		return 0;
2269 
2270 	return 1;
2271 }
2272 
2273 /**
2274  * amdgpu_device_ip_block_add
2275  *
2276  * @adev: amdgpu_device pointer
2277  * @ip_block_version: pointer to the IP to add
2278  *
2279  * Adds the IP block driver information to the collection of IPs
2280  * on the asic.
2281  */
amdgpu_device_ip_block_add(struct amdgpu_device * adev,const struct amdgpu_ip_block_version * ip_block_version)2282 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2283 			       const struct amdgpu_ip_block_version *ip_block_version)
2284 {
2285 	if (!ip_block_version)
2286 		return -EINVAL;
2287 
2288 	switch (ip_block_version->type) {
2289 	case AMD_IP_BLOCK_TYPE_VCN:
2290 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2291 			return 0;
2292 		break;
2293 	case AMD_IP_BLOCK_TYPE_JPEG:
2294 		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2295 			return 0;
2296 		break;
2297 	default:
2298 		break;
2299 	}
2300 
2301 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
2302 		  ip_block_version->funcs->name);
2303 
2304 	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2305 
2306 	return 0;
2307 }
2308 
2309 /**
2310  * amdgpu_device_enable_virtual_display - enable virtual display feature
2311  *
2312  * @adev: amdgpu_device pointer
2313  *
2314  * Enabled the virtual display feature if the user has enabled it via
2315  * the module parameter virtual_display.  This feature provides a virtual
2316  * display hardware on headless boards or in virtualized environments.
2317  * This function parses and validates the configuration string specified by
2318  * the user and configues the virtual display configuration (number of
2319  * virtual connectors, crtcs, etc.) specified.
2320  */
amdgpu_device_enable_virtual_display(struct amdgpu_device * adev)2321 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2322 {
2323 	adev->enable_virtual_display = false;
2324 
2325 #ifdef notyet
2326 	if (amdgpu_virtual_display) {
2327 		const char *pci_address_name = pci_name(adev->pdev);
2328 		char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
2329 
2330 		pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
2331 		pciaddstr_tmp = pciaddstr;
2332 		while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2333 			pciaddname = strsep(&pciaddname_tmp, ",");
2334 			if (!strcmp("all", pciaddname)
2335 			    || !strcmp(pci_address_name, pciaddname)) {
2336 				long num_crtc;
2337 				int res = -1;
2338 
2339 				adev->enable_virtual_display = true;
2340 
2341 				if (pciaddname_tmp)
2342 					res = kstrtol(pciaddname_tmp, 10,
2343 						      &num_crtc);
2344 
2345 				if (!res) {
2346 					if (num_crtc < 1)
2347 						num_crtc = 1;
2348 					if (num_crtc > 6)
2349 						num_crtc = 6;
2350 					adev->mode_info.num_crtc = num_crtc;
2351 				} else {
2352 					adev->mode_info.num_crtc = 1;
2353 				}
2354 				break;
2355 			}
2356 		}
2357 
2358 		DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2359 			 amdgpu_virtual_display, pci_address_name,
2360 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2361 
2362 		kfree(pciaddstr);
2363 	}
2364 #endif
2365 }
2366 
amdgpu_device_set_sriov_virtual_display(struct amdgpu_device * adev)2367 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2368 {
2369 	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2370 		adev->mode_info.num_crtc = 1;
2371 		adev->enable_virtual_display = true;
2372 		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2373 			 adev->enable_virtual_display, adev->mode_info.num_crtc);
2374 	}
2375 }
2376 
2377 /**
2378  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2379  *
2380  * @adev: amdgpu_device pointer
2381  *
2382  * Parses the asic configuration parameters specified in the gpu info
2383  * firmware and makes them availale to the driver for use in configuring
2384  * the asic.
2385  * Returns 0 on success, -EINVAL on failure.
2386  */
amdgpu_device_parse_gpu_info_fw(struct amdgpu_device * adev)2387 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2388 {
2389 	const char *chip_name;
2390 	int err;
2391 	const struct gpu_info_firmware_header_v1_0 *hdr;
2392 
2393 	adev->firmware.gpu_info_fw = NULL;
2394 
2395 	if (adev->mman.discovery_bin)
2396 		return 0;
2397 
2398 	switch (adev->asic_type) {
2399 	default:
2400 		return 0;
2401 	case CHIP_VEGA10:
2402 		chip_name = "vega10";
2403 		break;
2404 	case CHIP_VEGA12:
2405 		chip_name = "vega12";
2406 		break;
2407 	case CHIP_RAVEN:
2408 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2409 			chip_name = "raven2";
2410 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2411 			chip_name = "picasso";
2412 		else
2413 			chip_name = "raven";
2414 		break;
2415 	case CHIP_ARCTURUS:
2416 		chip_name = "arcturus";
2417 		break;
2418 	case CHIP_NAVI12:
2419 		chip_name = "navi12";
2420 		break;
2421 	}
2422 
2423 	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,
2424 				   "amdgpu/%s_gpu_info.bin", chip_name);
2425 	if (err) {
2426 		dev_err(adev->dev,
2427 			"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2428 			chip_name);
2429 		goto out;
2430 	}
2431 
2432 	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2433 	amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2434 
2435 	switch (hdr->version_major) {
2436 	case 1:
2437 	{
2438 		const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2439 			(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2440 								le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2441 
2442 		/*
2443 		 * Should be droped when DAL no longer needs it.
2444 		 */
2445 		if (adev->asic_type == CHIP_NAVI12)
2446 			goto parse_soc_bounding_box;
2447 
2448 		adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2449 		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2450 		adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2451 		adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2452 		adev->gfx.config.max_texture_channel_caches =
2453 			le32_to_cpu(gpu_info_fw->gc_num_tccs);
2454 		adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2455 		adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2456 		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2457 		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2458 		adev->gfx.config.double_offchip_lds_buf =
2459 			le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2460 		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2461 		adev->gfx.cu_info.max_waves_per_simd =
2462 			le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2463 		adev->gfx.cu_info.max_scratch_slots_per_cu =
2464 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2465 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2466 		if (hdr->version_minor >= 1) {
2467 			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2468 				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2469 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2470 			adev->gfx.config.num_sc_per_sh =
2471 				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2472 			adev->gfx.config.num_packer_per_sc =
2473 				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2474 		}
2475 
2476 parse_soc_bounding_box:
2477 		/*
2478 		 * soc bounding box info is not integrated in disocovery table,
2479 		 * we always need to parse it from gpu info firmware if needed.
2480 		 */
2481 		if (hdr->version_minor == 2) {
2482 			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2483 				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2484 									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2485 			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2486 		}
2487 		break;
2488 	}
2489 	default:
2490 		dev_err(adev->dev,
2491 			"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2492 		err = -EINVAL;
2493 		goto out;
2494 	}
2495 out:
2496 	return err;
2497 }
2498 
2499 /**
2500  * amdgpu_device_ip_early_init - run early init for hardware IPs
2501  *
2502  * @adev: amdgpu_device pointer
2503  *
2504  * Early initialization pass for hardware IPs.  The hardware IPs that make
2505  * up each asic are discovered each IP's early_init callback is run.  This
2506  * is the first stage in initializing the asic.
2507  * Returns 0 on success, negative error code on failure.
2508  */
amdgpu_device_ip_early_init(struct amdgpu_device * adev)2509 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2510 {
2511 	struct amdgpu_ip_block *ip_block;
2512 	struct pci_dev *parent;
2513 	int i, r;
2514 	bool total;
2515 
2516 	amdgpu_device_enable_virtual_display(adev);
2517 
2518 	if (amdgpu_sriov_vf(adev)) {
2519 		r = amdgpu_virt_request_full_gpu(adev, true);
2520 		if (r)
2521 			return r;
2522 	}
2523 
2524 	switch (adev->asic_type) {
2525 #ifdef CONFIG_DRM_AMDGPU_SI
2526 	case CHIP_VERDE:
2527 	case CHIP_TAHITI:
2528 	case CHIP_PITCAIRN:
2529 	case CHIP_OLAND:
2530 	case CHIP_HAINAN:
2531 		adev->family = AMDGPU_FAMILY_SI;
2532 		r = si_set_ip_blocks(adev);
2533 		if (r)
2534 			return r;
2535 		break;
2536 #endif
2537 #ifdef CONFIG_DRM_AMDGPU_CIK
2538 	case CHIP_BONAIRE:
2539 	case CHIP_HAWAII:
2540 	case CHIP_KAVERI:
2541 	case CHIP_KABINI:
2542 	case CHIP_MULLINS:
2543 		if (adev->flags & AMD_IS_APU)
2544 			adev->family = AMDGPU_FAMILY_KV;
2545 		else
2546 			adev->family = AMDGPU_FAMILY_CI;
2547 
2548 		r = cik_set_ip_blocks(adev);
2549 		if (r)
2550 			return r;
2551 		break;
2552 #endif
2553 	case CHIP_TOPAZ:
2554 	case CHIP_TONGA:
2555 	case CHIP_FIJI:
2556 	case CHIP_POLARIS10:
2557 	case CHIP_POLARIS11:
2558 	case CHIP_POLARIS12:
2559 	case CHIP_VEGAM:
2560 	case CHIP_CARRIZO:
2561 	case CHIP_STONEY:
2562 		if (adev->flags & AMD_IS_APU)
2563 			adev->family = AMDGPU_FAMILY_CZ;
2564 		else
2565 			adev->family = AMDGPU_FAMILY_VI;
2566 
2567 		r = vi_set_ip_blocks(adev);
2568 		if (r)
2569 			return r;
2570 		break;
2571 	default:
2572 		r = amdgpu_discovery_set_ip_blocks(adev);
2573 		if (r)
2574 			return r;
2575 		break;
2576 	}
2577 
2578 	if (amdgpu_has_atpx() &&
2579 	    (amdgpu_is_atpx_hybrid() ||
2580 	     amdgpu_has_atpx_dgpu_power_cntl()) &&
2581 	    ((adev->flags & AMD_IS_APU) == 0) &&
2582 	    !dev_is_removable(&adev->pdev->dev))
2583 		adev->flags |= AMD_IS_PX;
2584 
2585 	if (!(adev->flags & AMD_IS_APU)) {
2586 #ifdef notyet
2587 		parent = pcie_find_root_port(adev->pdev);
2588 		adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2589 #else
2590 		adev->has_pr3 = false;
2591 #endif
2592 	}
2593 
2594 
2595 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2596 	if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2597 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2598 	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2599 		adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2600 	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2601 		adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2602 
2603 	total = true;
2604 	for (i = 0; i < adev->num_ip_blocks; i++) {
2605 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2606 			DRM_WARN("disabled ip block: %d <%s>\n",
2607 				  i, adev->ip_blocks[i].version->funcs->name);
2608 			adev->ip_blocks[i].status.valid = false;
2609 		} else {
2610 			if (adev->ip_blocks[i].version->funcs->early_init) {
2611 				r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2612 				if (r == -ENOENT) {
2613 					adev->ip_blocks[i].status.valid = false;
2614 				} else if (r) {
2615 					DRM_ERROR("early_init of IP block <%s> failed %d\n",
2616 						  adev->ip_blocks[i].version->funcs->name, r);
2617 					total = false;
2618 				} else {
2619 					adev->ip_blocks[i].status.valid = true;
2620 				}
2621 			} else {
2622 				adev->ip_blocks[i].status.valid = true;
2623 			}
2624 		}
2625 		/* get the vbios after the asic_funcs are set up */
2626 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2627 			r = amdgpu_device_parse_gpu_info_fw(adev);
2628 			if (r)
2629 				return r;
2630 
2631 			/* Read BIOS */
2632 			if (amdgpu_device_read_bios(adev)) {
2633 				if (!amdgpu_get_bios(adev))
2634 					return -EINVAL;
2635 
2636 				r = amdgpu_atombios_init(adev);
2637 				if (r) {
2638 					dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2639 					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2640 					return r;
2641 				}
2642 			}
2643 
2644 			/*get pf2vf msg info at it's earliest time*/
2645 			if (amdgpu_sriov_vf(adev))
2646 				amdgpu_virt_init_data_exchange(adev);
2647 
2648 		}
2649 	}
2650 	if (!total)
2651 		return -ENODEV;
2652 
2653 	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
2654 	if (ip_block->status.valid != false)
2655 		amdgpu_amdkfd_device_probe(adev);
2656 
2657 	adev->cg_flags &= amdgpu_cg_mask;
2658 	adev->pg_flags &= amdgpu_pg_mask;
2659 
2660 	return 0;
2661 }
2662 
amdgpu_device_ip_hw_init_phase1(struct amdgpu_device * adev)2663 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2664 {
2665 	int i, r;
2666 
2667 	for (i = 0; i < adev->num_ip_blocks; i++) {
2668 		if (!adev->ip_blocks[i].status.sw)
2669 			continue;
2670 		if (adev->ip_blocks[i].status.hw)
2671 			continue;
2672 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2673 		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2674 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2675 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2676 			if (r) {
2677 				DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2678 					  adev->ip_blocks[i].version->funcs->name, r);
2679 				return r;
2680 			}
2681 			adev->ip_blocks[i].status.hw = true;
2682 		}
2683 	}
2684 
2685 	return 0;
2686 }
2687 
amdgpu_device_ip_hw_init_phase2(struct amdgpu_device * adev)2688 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2689 {
2690 	int i, r;
2691 
2692 	for (i = 0; i < adev->num_ip_blocks; i++) {
2693 		if (!adev->ip_blocks[i].status.sw)
2694 			continue;
2695 		if (adev->ip_blocks[i].status.hw)
2696 			continue;
2697 		r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2698 		if (r) {
2699 			DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2700 				  adev->ip_blocks[i].version->funcs->name, r);
2701 			return r;
2702 		}
2703 		adev->ip_blocks[i].status.hw = true;
2704 	}
2705 
2706 	return 0;
2707 }
2708 
amdgpu_device_fw_loading(struct amdgpu_device * adev)2709 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2710 {
2711 	int r = 0;
2712 	int i;
2713 	uint32_t smu_version;
2714 
2715 	if (adev->asic_type >= CHIP_VEGA10) {
2716 		for (i = 0; i < adev->num_ip_blocks; i++) {
2717 			if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2718 				continue;
2719 
2720 			if (!adev->ip_blocks[i].status.sw)
2721 				continue;
2722 
2723 			/* no need to do the fw loading again if already done*/
2724 			if (adev->ip_blocks[i].status.hw == true)
2725 				break;
2726 
2727 			if (amdgpu_in_reset(adev) || adev->in_suspend) {
2728 				r = adev->ip_blocks[i].version->funcs->resume(adev);
2729 				if (r) {
2730 					DRM_ERROR("resume of IP block <%s> failed %d\n",
2731 							  adev->ip_blocks[i].version->funcs->name, r);
2732 					return r;
2733 				}
2734 			} else {
2735 				r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2736 				if (r) {
2737 					DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2738 							  adev->ip_blocks[i].version->funcs->name, r);
2739 					return r;
2740 				}
2741 			}
2742 
2743 			adev->ip_blocks[i].status.hw = true;
2744 			break;
2745 		}
2746 	}
2747 
2748 	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2749 		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2750 
2751 	return r;
2752 }
2753 
amdgpu_device_init_schedulers(struct amdgpu_device * adev)2754 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2755 {
2756 	long timeout;
2757 	int r, i;
2758 
2759 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2760 		struct amdgpu_ring *ring = adev->rings[i];
2761 
2762 		/* No need to setup the GPU scheduler for rings that don't need it */
2763 		if (!ring || ring->no_scheduler)
2764 			continue;
2765 
2766 		switch (ring->funcs->type) {
2767 		case AMDGPU_RING_TYPE_GFX:
2768 			timeout = adev->gfx_timeout;
2769 			break;
2770 		case AMDGPU_RING_TYPE_COMPUTE:
2771 			timeout = adev->compute_timeout;
2772 			break;
2773 		case AMDGPU_RING_TYPE_SDMA:
2774 			timeout = adev->sdma_timeout;
2775 			break;
2776 		default:
2777 			timeout = adev->video_timeout;
2778 			break;
2779 		}
2780 
2781 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
2782 				   DRM_SCHED_PRIORITY_COUNT,
2783 				   ring->num_hw_submission, 0,
2784 				   timeout, adev->reset_domain->wq,
2785 				   ring->sched_score, ring->name,
2786 				   adev->dev);
2787 		if (r) {
2788 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
2789 				  ring->name);
2790 			return r;
2791 		}
2792 		r = amdgpu_uvd_entity_init(adev, ring);
2793 		if (r) {
2794 			DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2795 				  ring->name);
2796 			return r;
2797 		}
2798 		r = amdgpu_vce_entity_init(adev, ring);
2799 		if (r) {
2800 			DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
2801 				  ring->name);
2802 			return r;
2803 		}
2804 	}
2805 
2806 	amdgpu_xcp_update_partition_sched_list(adev);
2807 
2808 	return 0;
2809 }
2810 
2811 
2812 /**
2813  * amdgpu_device_ip_init - run init for hardware IPs
2814  *
2815  * @adev: amdgpu_device pointer
2816  *
2817  * Main initialization pass for hardware IPs.  The list of all the hardware
2818  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2819  * are run.  sw_init initializes the software state associated with each IP
2820  * and hw_init initializes the hardware associated with each IP.
2821  * Returns 0 on success, negative error code on failure.
2822  */
amdgpu_device_ip_init(struct amdgpu_device * adev)2823 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2824 {
2825 	int i, r;
2826 
2827 	r = amdgpu_ras_init(adev);
2828 	if (r)
2829 		return r;
2830 
2831 	for (i = 0; i < adev->num_ip_blocks; i++) {
2832 		if (!adev->ip_blocks[i].status.valid)
2833 			continue;
2834 		r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2835 		if (r) {
2836 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2837 				  adev->ip_blocks[i].version->funcs->name, r);
2838 			goto init_failed;
2839 		}
2840 		adev->ip_blocks[i].status.sw = true;
2841 
2842 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2843 			/* need to do common hw init early so everything is set up for gmc */
2844 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2845 			if (r) {
2846 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2847 				goto init_failed;
2848 			}
2849 			adev->ip_blocks[i].status.hw = true;
2850 		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2851 			/* need to do gmc hw init early so we can allocate gpu mem */
2852 			/* Try to reserve bad pages early */
2853 			if (amdgpu_sriov_vf(adev))
2854 				amdgpu_virt_exchange_data(adev);
2855 
2856 			r = amdgpu_device_mem_scratch_init(adev);
2857 			if (r) {
2858 				DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2859 				goto init_failed;
2860 			}
2861 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2862 			if (r) {
2863 				DRM_ERROR("hw_init %d failed %d\n", i, r);
2864 				goto init_failed;
2865 			}
2866 			r = amdgpu_device_wb_init(adev);
2867 			if (r) {
2868 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2869 				goto init_failed;
2870 			}
2871 			adev->ip_blocks[i].status.hw = true;
2872 
2873 			/* right after GMC hw init, we create CSA */
2874 			if (adev->gfx.mcbp) {
2875 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2876 							       AMDGPU_GEM_DOMAIN_VRAM |
2877 							       AMDGPU_GEM_DOMAIN_GTT,
2878 							       AMDGPU_CSA_SIZE);
2879 				if (r) {
2880 					DRM_ERROR("allocate CSA failed %d\n", r);
2881 					goto init_failed;
2882 				}
2883 			}
2884 
2885 			r = amdgpu_seq64_init(adev);
2886 			if (r) {
2887 				DRM_ERROR("allocate seq64 failed %d\n", r);
2888 				goto init_failed;
2889 			}
2890 		}
2891 	}
2892 
2893 	if (amdgpu_sriov_vf(adev))
2894 		amdgpu_virt_init_data_exchange(adev);
2895 
2896 	r = amdgpu_ib_pool_init(adev);
2897 	if (r) {
2898 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2899 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2900 		goto init_failed;
2901 	}
2902 
2903 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2904 	if (r)
2905 		goto init_failed;
2906 
2907 	r = amdgpu_device_ip_hw_init_phase1(adev);
2908 	if (r)
2909 		goto init_failed;
2910 
2911 	r = amdgpu_device_fw_loading(adev);
2912 	if (r)
2913 		goto init_failed;
2914 
2915 	r = amdgpu_device_ip_hw_init_phase2(adev);
2916 	if (r)
2917 		goto init_failed;
2918 
2919 	/*
2920 	 * retired pages will be loaded from eeprom and reserved here,
2921 	 * it should be called after amdgpu_device_ip_hw_init_phase2  since
2922 	 * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2923 	 * for I2C communication which only true at this point.
2924 	 *
2925 	 * amdgpu_ras_recovery_init may fail, but the upper only cares the
2926 	 * failure from bad gpu situation and stop amdgpu init process
2927 	 * accordingly. For other failed cases, it will still release all
2928 	 * the resource and print error message, rather than returning one
2929 	 * negative value to upper level.
2930 	 *
2931 	 * Note: theoretically, this should be called before all vram allocations
2932 	 * to protect retired page from abusing
2933 	 */
2934 	r = amdgpu_ras_recovery_init(adev);
2935 	if (r)
2936 		goto init_failed;
2937 
2938 	/**
2939 	 * In case of XGMI grab extra reference for reset domain for this device
2940 	 */
2941 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
2942 		if (amdgpu_xgmi_add_device(adev) == 0) {
2943 			if (!amdgpu_sriov_vf(adev)) {
2944 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2945 
2946 				if (WARN_ON(!hive)) {
2947 					r = -ENOENT;
2948 					goto init_failed;
2949 				}
2950 
2951 				if (!hive->reset_domain ||
2952 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2953 					r = -ENOENT;
2954 					amdgpu_put_xgmi_hive(hive);
2955 					goto init_failed;
2956 				}
2957 
2958 				/* Drop the early temporary reset domain we created for device */
2959 				amdgpu_reset_put_reset_domain(adev->reset_domain);
2960 				adev->reset_domain = hive->reset_domain;
2961 				amdgpu_put_xgmi_hive(hive);
2962 			}
2963 		}
2964 	}
2965 
2966 	r = amdgpu_device_init_schedulers(adev);
2967 	if (r)
2968 		goto init_failed;
2969 
2970 	if (adev->mman.buffer_funcs_ring->sched.ready)
2971 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
2972 
2973 	/* Don't init kfd if whole hive need to be reset during init */
2974 	if (!adev->gmc.xgmi.pending_reset) {
2975 		kgd2kfd_init_zone_device(adev);
2976 		amdgpu_amdkfd_device_init(adev);
2977 	}
2978 
2979 	amdgpu_fru_get_product_info(adev);
2980 
2981 init_failed:
2982 
2983 	return r;
2984 }
2985 
2986 /**
2987  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2988  *
2989  * @adev: amdgpu_device pointer
2990  *
2991  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2992  * this function before a GPU reset.  If the value is retained after a
2993  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2994  */
amdgpu_device_fill_reset_magic(struct amdgpu_device * adev)2995 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2996 {
2997 	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2998 }
2999 
3000 /**
3001  * amdgpu_device_check_vram_lost - check if vram is valid
3002  *
3003  * @adev: amdgpu_device pointer
3004  *
3005  * Checks the reset magic value written to the gart pointer in VRAM.
3006  * The driver calls this after a GPU reset to see if the contents of
3007  * VRAM is lost or now.
3008  * returns true if vram is lost, false if not.
3009  */
amdgpu_device_check_vram_lost(struct amdgpu_device * adev)3010 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3011 {
3012 	if (memcmp(adev->gart.ptr, adev->reset_magic,
3013 			AMDGPU_RESET_MAGIC_NUM))
3014 		return true;
3015 
3016 	if (!amdgpu_in_reset(adev))
3017 		return false;
3018 
3019 	/*
3020 	 * For all ASICs with baco/mode1 reset, the VRAM is
3021 	 * always assumed to be lost.
3022 	 */
3023 	switch (amdgpu_asic_reset_method(adev)) {
3024 	case AMD_RESET_METHOD_BACO:
3025 	case AMD_RESET_METHOD_MODE1:
3026 		return true;
3027 	default:
3028 		return false;
3029 	}
3030 }
3031 
3032 /**
3033  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
3034  *
3035  * @adev: amdgpu_device pointer
3036  * @state: clockgating state (gate or ungate)
3037  *
3038  * The list of all the hardware IPs that make up the asic is walked and the
3039  * set_clockgating_state callbacks are run.
3040  * Late initialization pass enabling clockgating for hardware IPs.
3041  * Fini or suspend, pass disabling clockgating for hardware IPs.
3042  * Returns 0 on success, negative error code on failure.
3043  */
3044 
amdgpu_device_set_cg_state(struct amdgpu_device * adev,enum amd_clockgating_state state)3045 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3046 			       enum amd_clockgating_state state)
3047 {
3048 	int i, j, r;
3049 
3050 	if (amdgpu_emu_mode == 1)
3051 		return 0;
3052 
3053 	for (j = 0; j < adev->num_ip_blocks; j++) {
3054 		i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3055 		if (!adev->ip_blocks[i].status.late_initialized)
3056 			continue;
3057 		/* skip CG for GFX, SDMA on S0ix */
3058 		if (adev->in_s0ix &&
3059 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3060 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3061 			continue;
3062 		/* skip CG for VCE/UVD, it's handled specially */
3063 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3064 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3065 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3066 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3067 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3068 			/* enable clockgating to save power */
3069 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
3070 										     state);
3071 			if (r) {
3072 				DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3073 					  adev->ip_blocks[i].version->funcs->name, r);
3074 				return r;
3075 			}
3076 		}
3077 	}
3078 
3079 	return 0;
3080 }
3081 
amdgpu_device_set_pg_state(struct amdgpu_device * adev,enum amd_powergating_state state)3082 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3083 			       enum amd_powergating_state state)
3084 {
3085 	int i, j, r;
3086 
3087 	if (amdgpu_emu_mode == 1)
3088 		return 0;
3089 
3090 	for (j = 0; j < adev->num_ip_blocks; j++) {
3091 		i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
3092 		if (!adev->ip_blocks[i].status.late_initialized)
3093 			continue;
3094 		/* skip PG for GFX, SDMA on S0ix */
3095 		if (adev->in_s0ix &&
3096 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3097 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3098 			continue;
3099 		/* skip CG for VCE/UVD, it's handled specially */
3100 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3101 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3102 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3103 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3104 		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
3105 			/* enable powergating to save power */
3106 			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
3107 											state);
3108 			if (r) {
3109 				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3110 					  adev->ip_blocks[i].version->funcs->name, r);
3111 				return r;
3112 			}
3113 		}
3114 	}
3115 	return 0;
3116 }
3117 
amdgpu_device_enable_mgpu_fan_boost(void)3118 static int amdgpu_device_enable_mgpu_fan_boost(void)
3119 {
3120 	struct amdgpu_gpu_instance *gpu_ins;
3121 	struct amdgpu_device *adev;
3122 	int i, ret = 0;
3123 
3124 	mutex_lock(&mgpu_info.mutex);
3125 
3126 	/*
3127 	 * MGPU fan boost feature should be enabled
3128 	 * only when there are two or more dGPUs in
3129 	 * the system
3130 	 */
3131 	if (mgpu_info.num_dgpu < 2)
3132 		goto out;
3133 
3134 	for (i = 0; i < mgpu_info.num_dgpu; i++) {
3135 		gpu_ins = &(mgpu_info.gpu_ins[i]);
3136 		adev = gpu_ins->adev;
3137 		if (!(adev->flags & AMD_IS_APU) &&
3138 		    !gpu_ins->mgpu_fan_enabled) {
3139 			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3140 			if (ret)
3141 				break;
3142 
3143 			gpu_ins->mgpu_fan_enabled = 1;
3144 		}
3145 	}
3146 
3147 out:
3148 	mutex_unlock(&mgpu_info.mutex);
3149 
3150 	return ret;
3151 }
3152 
3153 /**
3154  * amdgpu_device_ip_late_init - run late init for hardware IPs
3155  *
3156  * @adev: amdgpu_device pointer
3157  *
3158  * Late initialization pass for hardware IPs.  The list of all the hardware
3159  * IPs that make up the asic is walked and the late_init callbacks are run.
3160  * late_init covers any special initialization that an IP requires
3161  * after all of the have been initialized or something that needs to happen
3162  * late in the init process.
3163  * Returns 0 on success, negative error code on failure.
3164  */
amdgpu_device_ip_late_init(struct amdgpu_device * adev)3165 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3166 {
3167 	struct amdgpu_gpu_instance *gpu_instance;
3168 	int i = 0, r;
3169 
3170 	for (i = 0; i < adev->num_ip_blocks; i++) {
3171 		if (!adev->ip_blocks[i].status.hw)
3172 			continue;
3173 		if (adev->ip_blocks[i].version->funcs->late_init) {
3174 			r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
3175 			if (r) {
3176 				DRM_ERROR("late_init of IP block <%s> failed %d\n",
3177 					  adev->ip_blocks[i].version->funcs->name, r);
3178 				return r;
3179 			}
3180 		}
3181 		adev->ip_blocks[i].status.late_initialized = true;
3182 	}
3183 
3184 	r = amdgpu_ras_late_init(adev);
3185 	if (r) {
3186 		DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3187 		return r;
3188 	}
3189 
3190 	if (!amdgpu_in_reset(adev))
3191 		amdgpu_ras_set_error_query_ready(adev, true);
3192 
3193 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
3194 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
3195 
3196 	amdgpu_device_fill_reset_magic(adev);
3197 
3198 	r = amdgpu_device_enable_mgpu_fan_boost();
3199 	if (r)
3200 		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3201 
3202 	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
3203 	if (amdgpu_passthrough(adev) &&
3204 	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
3205 	     adev->asic_type == CHIP_ALDEBARAN))
3206 		amdgpu_dpm_handle_passthrough_sbr(adev, true);
3207 
3208 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
3209 		mutex_lock(&mgpu_info.mutex);
3210 
3211 		/*
3212 		 * Reset device p-state to low as this was booted with high.
3213 		 *
3214 		 * This should be performed only after all devices from the same
3215 		 * hive get initialized.
3216 		 *
3217 		 * However, it's unknown how many device in the hive in advance.
3218 		 * As this is counted one by one during devices initializations.
3219 		 *
3220 		 * So, we wait for all XGMI interlinked devices initialized.
3221 		 * This may bring some delays as those devices may come from
3222 		 * different hives. But that should be OK.
3223 		 */
3224 		if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3225 			for (i = 0; i < mgpu_info.num_gpu; i++) {
3226 				gpu_instance = &(mgpu_info.gpu_ins[i]);
3227 				if (gpu_instance->adev->flags & AMD_IS_APU)
3228 					continue;
3229 
3230 				r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
3231 						AMDGPU_XGMI_PSTATE_MIN);
3232 				if (r) {
3233 					DRM_ERROR("pstate setting failed (%d).\n", r);
3234 					break;
3235 				}
3236 			}
3237 		}
3238 
3239 		mutex_unlock(&mgpu_info.mutex);
3240 	}
3241 
3242 	return 0;
3243 }
3244 
3245 /**
3246  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
3247  *
3248  * @adev: amdgpu_device pointer
3249  *
3250  * For ASICs need to disable SMC first
3251  */
amdgpu_device_smu_fini_early(struct amdgpu_device * adev)3252 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3253 {
3254 	int i, r;
3255 
3256 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
3257 		return;
3258 
3259 	for (i = 0; i < adev->num_ip_blocks; i++) {
3260 		if (!adev->ip_blocks[i].status.hw)
3261 			continue;
3262 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3263 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3264 			/* XXX handle errors */
3265 			if (r) {
3266 				DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3267 					  adev->ip_blocks[i].version->funcs->name, r);
3268 			}
3269 			adev->ip_blocks[i].status.hw = false;
3270 			break;
3271 		}
3272 	}
3273 }
3274 
amdgpu_device_ip_fini_early(struct amdgpu_device * adev)3275 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3276 {
3277 	int i, r;
3278 
3279 	for (i = 0; i < adev->num_ip_blocks; i++) {
3280 		if (!adev->ip_blocks[i].version->funcs->early_fini)
3281 			continue;
3282 
3283 		r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
3284 		if (r) {
3285 			DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3286 				  adev->ip_blocks[i].version->funcs->name, r);
3287 		}
3288 	}
3289 
3290 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3291 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3292 
3293 	amdgpu_amdkfd_suspend(adev, false);
3294 
3295 	/* Workaroud for ASICs need to disable SMC first */
3296 	amdgpu_device_smu_fini_early(adev);
3297 
3298 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3299 		if (!adev->ip_blocks[i].status.hw)
3300 			continue;
3301 
3302 		r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
3303 		/* XXX handle errors */
3304 		if (r) {
3305 			DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3306 				  adev->ip_blocks[i].version->funcs->name, r);
3307 		}
3308 
3309 		adev->ip_blocks[i].status.hw = false;
3310 	}
3311 
3312 	if (amdgpu_sriov_vf(adev)) {
3313 		if (amdgpu_virt_release_full_gpu(adev, false))
3314 			DRM_ERROR("failed to release exclusive mode on fini\n");
3315 	}
3316 
3317 	return 0;
3318 }
3319 
3320 /**
3321  * amdgpu_device_ip_fini - run fini for hardware IPs
3322  *
3323  * @adev: amdgpu_device pointer
3324  *
3325  * Main teardown pass for hardware IPs.  The list of all the hardware
3326  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3327  * are run.  hw_fini tears down the hardware associated with each IP
3328  * and sw_fini tears down any software state associated with each IP.
3329  * Returns 0 on success, negative error code on failure.
3330  */
amdgpu_device_ip_fini(struct amdgpu_device * adev)3331 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3332 {
3333 	int i, r;
3334 
3335 	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3336 		amdgpu_virt_release_ras_err_handler_data(adev);
3337 
3338 	if (adev->gmc.xgmi.num_physical_nodes > 1)
3339 		amdgpu_xgmi_remove_device(adev);
3340 
3341 	amdgpu_amdkfd_device_fini_sw(adev);
3342 
3343 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3344 		if (!adev->ip_blocks[i].status.sw)
3345 			continue;
3346 
3347 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3348 			amdgpu_ucode_free_bo(adev);
3349 			amdgpu_free_static_csa(&adev->virt.csa_obj);
3350 			amdgpu_device_wb_fini(adev);
3351 			amdgpu_device_mem_scratch_fini(adev);
3352 			amdgpu_ib_pool_fini(adev);
3353 			amdgpu_seq64_fini(adev);
3354 		}
3355 
3356 		r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
3357 		/* XXX handle errors */
3358 		if (r) {
3359 			DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3360 				  adev->ip_blocks[i].version->funcs->name, r);
3361 		}
3362 		adev->ip_blocks[i].status.sw = false;
3363 		adev->ip_blocks[i].status.valid = false;
3364 	}
3365 
3366 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3367 		if (!adev->ip_blocks[i].status.late_initialized)
3368 			continue;
3369 		if (adev->ip_blocks[i].version->funcs->late_fini)
3370 			adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
3371 		adev->ip_blocks[i].status.late_initialized = false;
3372 	}
3373 
3374 	amdgpu_ras_fini(adev);
3375 
3376 	return 0;
3377 }
3378 
3379 /**
3380  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
3381  *
3382  * @work: work_struct.
3383  */
amdgpu_device_delayed_init_work_handler(struct work_struct * work)3384 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3385 {
3386 	struct amdgpu_device *adev =
3387 		container_of(work, struct amdgpu_device, delayed_init_work.work);
3388 	int r;
3389 
3390 	r = amdgpu_ib_ring_tests(adev);
3391 	if (r)
3392 		DRM_ERROR("ib ring test failed (%d).\n", r);
3393 }
3394 
amdgpu_device_delay_enable_gfx_off(struct work_struct * work)3395 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3396 {
3397 	struct amdgpu_device *adev =
3398 		container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3399 
3400 	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3401 	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3402 
3403 	if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
3404 		adev->gfx.gfx_off_state = true;
3405 }
3406 
3407 /**
3408  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3409  *
3410  * @adev: amdgpu_device pointer
3411  *
3412  * Main suspend function for hardware IPs.  The list of all the hardware
3413  * IPs that make up the asic is walked, clockgating is disabled and the
3414  * suspend callbacks are run.  suspend puts the hardware and software state
3415  * in each IP into a state suitable for suspend.
3416  * Returns 0 on success, negative error code on failure.
3417  */
amdgpu_device_ip_suspend_phase1(struct amdgpu_device * adev)3418 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3419 {
3420 	int i, r;
3421 
3422 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
3423 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
3424 
3425 	/*
3426 	 * Per PMFW team's suggestion, driver needs to handle gfxoff
3427 	 * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3428 	 * scenario. Add the missing df cstate disablement here.
3429 	 */
3430 	if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
3431 		dev_warn(adev->dev, "Failed to disallow df cstate");
3432 
3433 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3434 		if (!adev->ip_blocks[i].status.valid)
3435 			continue;
3436 
3437 		/* displays are handled separately */
3438 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3439 			continue;
3440 
3441 		/* XXX handle errors */
3442 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3443 		/* XXX handle errors */
3444 		if (r) {
3445 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3446 				  adev->ip_blocks[i].version->funcs->name, r);
3447 			return r;
3448 		}
3449 
3450 		adev->ip_blocks[i].status.hw = false;
3451 	}
3452 
3453 	return 0;
3454 }
3455 
3456 /**
3457  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3458  *
3459  * @adev: amdgpu_device pointer
3460  *
3461  * Main suspend function for hardware IPs.  The list of all the hardware
3462  * IPs that make up the asic is walked, clockgating is disabled and the
3463  * suspend callbacks are run.  suspend puts the hardware and software state
3464  * in each IP into a state suitable for suspend.
3465  * Returns 0 on success, negative error code on failure.
3466  */
amdgpu_device_ip_suspend_phase2(struct amdgpu_device * adev)3467 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3468 {
3469 	int i, r;
3470 
3471 	if (adev->in_s0ix)
3472 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3473 
3474 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3475 		if (!adev->ip_blocks[i].status.valid)
3476 			continue;
3477 		/* displays are handled in phase1 */
3478 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3479 			continue;
3480 		/* PSP lost connection when err_event_athub occurs */
3481 		if (amdgpu_ras_intr_triggered() &&
3482 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3483 			adev->ip_blocks[i].status.hw = false;
3484 			continue;
3485 		}
3486 
3487 		/* skip unnecessary suspend if we do not initialize them yet */
3488 		if (adev->gmc.xgmi.pending_reset &&
3489 		    !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3490 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3491 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3492 		      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3493 			adev->ip_blocks[i].status.hw = false;
3494 			continue;
3495 		}
3496 
3497 		/* skip suspend of gfx/mes and psp for S0ix
3498 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
3499 		 * like at runtime. PSP is also part of the always on hardware
3500 		 * so no need to suspend it.
3501 		 */
3502 		if (adev->in_s0ix &&
3503 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3504 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3505 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3506 			continue;
3507 
3508 		/* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3509 		if (adev->in_s0ix &&
3510 		    (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >=
3511 		     IP_VERSION(5, 0, 0)) &&
3512 		    (adev->ip_blocks[i].version->type ==
3513 		     AMD_IP_BLOCK_TYPE_SDMA))
3514 			continue;
3515 
3516 		/* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3517 		 * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3518 		 * from this location and RLC Autoload automatically also gets loaded
3519 		 * from here based on PMFW -> PSP message during re-init sequence.
3520 		 * Therefore, the psp suspend & resume should be skipped to avoid destroy
3521 		 * the TMR and reload FWs again for IMU enabled APU ASICs.
3522 		 */
3523 		if (amdgpu_in_reset(adev) &&
3524 		    (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3525 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3526 			continue;
3527 
3528 		/* XXX handle errors */
3529 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
3530 		/* XXX handle errors */
3531 		if (r) {
3532 			DRM_ERROR("suspend of IP block <%s> failed %d\n",
3533 				  adev->ip_blocks[i].version->funcs->name, r);
3534 		}
3535 		adev->ip_blocks[i].status.hw = false;
3536 		/* handle putting the SMC in the appropriate state */
3537 		if (!amdgpu_sriov_vf(adev)) {
3538 			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3539 				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3540 				if (r) {
3541 					DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3542 							adev->mp1_state, r);
3543 					return r;
3544 				}
3545 			}
3546 		}
3547 	}
3548 
3549 	return 0;
3550 }
3551 
3552 /**
3553  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3554  *
3555  * @adev: amdgpu_device pointer
3556  *
3557  * Main suspend function for hardware IPs.  The list of all the hardware
3558  * IPs that make up the asic is walked, clockgating is disabled and the
3559  * suspend callbacks are run.  suspend puts the hardware and software state
3560  * in each IP into a state suitable for suspend.
3561  * Returns 0 on success, negative error code on failure.
3562  */
amdgpu_device_ip_suspend(struct amdgpu_device * adev)3563 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3564 {
3565 	int r;
3566 
3567 	if (amdgpu_sriov_vf(adev)) {
3568 		amdgpu_virt_fini_data_exchange(adev);
3569 		amdgpu_virt_request_full_gpu(adev, false);
3570 	}
3571 
3572 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
3573 
3574 	r = amdgpu_device_ip_suspend_phase1(adev);
3575 	if (r)
3576 		return r;
3577 	r = amdgpu_device_ip_suspend_phase2(adev);
3578 
3579 	if (amdgpu_sriov_vf(adev))
3580 		amdgpu_virt_release_full_gpu(adev, false);
3581 
3582 	return r;
3583 }
3584 
amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device * adev)3585 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3586 {
3587 	int i, r;
3588 
3589 	static enum amd_ip_block_type ip_order[] = {
3590 		AMD_IP_BLOCK_TYPE_COMMON,
3591 		AMD_IP_BLOCK_TYPE_GMC,
3592 		AMD_IP_BLOCK_TYPE_PSP,
3593 		AMD_IP_BLOCK_TYPE_IH,
3594 	};
3595 
3596 	for (i = 0; i < adev->num_ip_blocks; i++) {
3597 		int j;
3598 		struct amdgpu_ip_block *block;
3599 
3600 		block = &adev->ip_blocks[i];
3601 		block->status.hw = false;
3602 
3603 		for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3604 
3605 			if (block->version->type != ip_order[j] ||
3606 				!block->status.valid)
3607 				continue;
3608 
3609 			r = block->version->funcs->hw_init(adev);
3610 			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3611 			if (r)
3612 				return r;
3613 			block->status.hw = true;
3614 		}
3615 	}
3616 
3617 	return 0;
3618 }
3619 
amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device * adev)3620 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3621 {
3622 	int i, r;
3623 
3624 	static enum amd_ip_block_type ip_order[] = {
3625 		AMD_IP_BLOCK_TYPE_SMC,
3626 		AMD_IP_BLOCK_TYPE_DCE,
3627 		AMD_IP_BLOCK_TYPE_GFX,
3628 		AMD_IP_BLOCK_TYPE_SDMA,
3629 		AMD_IP_BLOCK_TYPE_MES,
3630 		AMD_IP_BLOCK_TYPE_UVD,
3631 		AMD_IP_BLOCK_TYPE_VCE,
3632 		AMD_IP_BLOCK_TYPE_VCN,
3633 		AMD_IP_BLOCK_TYPE_JPEG
3634 	};
3635 
3636 	for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3637 		int j;
3638 		struct amdgpu_ip_block *block;
3639 
3640 		for (j = 0; j < adev->num_ip_blocks; j++) {
3641 			block = &adev->ip_blocks[j];
3642 
3643 			if (block->version->type != ip_order[i] ||
3644 				!block->status.valid ||
3645 				block->status.hw)
3646 				continue;
3647 
3648 			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3649 				r = block->version->funcs->resume(adev);
3650 			else
3651 				r = block->version->funcs->hw_init(adev);
3652 
3653 			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3654 			if (r)
3655 				return r;
3656 			block->status.hw = true;
3657 		}
3658 	}
3659 
3660 	return 0;
3661 }
3662 
3663 /**
3664  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3665  *
3666  * @adev: amdgpu_device pointer
3667  *
3668  * First resume function for hardware IPs.  The list of all the hardware
3669  * IPs that make up the asic is walked and the resume callbacks are run for
3670  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3671  * after a suspend and updates the software state as necessary.  This
3672  * function is also used for restoring the GPU after a GPU reset.
3673  * Returns 0 on success, negative error code on failure.
3674  */
amdgpu_device_ip_resume_phase1(struct amdgpu_device * adev)3675 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3676 {
3677 	int i, r;
3678 
3679 	for (i = 0; i < adev->num_ip_blocks; i++) {
3680 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3681 			continue;
3682 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3683 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3684 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3685 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3686 
3687 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3688 			if (r) {
3689 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3690 					  adev->ip_blocks[i].version->funcs->name, r);
3691 				return r;
3692 			}
3693 			adev->ip_blocks[i].status.hw = true;
3694 		}
3695 	}
3696 
3697 	return 0;
3698 }
3699 
3700 /**
3701  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3702  *
3703  * @adev: amdgpu_device pointer
3704  *
3705  * Second resume function for hardware IPs.  The list of all the hardware
3706  * IPs that make up the asic is walked and the resume callbacks are run for
3707  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3708  * functional state after a suspend and updates the software state as
3709  * necessary.  This function is also used for restoring the GPU after a GPU
3710  * reset.
3711  * Returns 0 on success, negative error code on failure.
3712  */
amdgpu_device_ip_resume_phase2(struct amdgpu_device * adev)3713 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3714 {
3715 	int i, r;
3716 
3717 	for (i = 0; i < adev->num_ip_blocks; i++) {
3718 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3719 			continue;
3720 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3721 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3722 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3723 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
3724 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3725 			continue;
3726 		r = adev->ip_blocks[i].version->funcs->resume(adev);
3727 		if (r) {
3728 			DRM_ERROR("resume of IP block <%s> failed %d\n",
3729 				  adev->ip_blocks[i].version->funcs->name, r);
3730 			return r;
3731 		}
3732 		adev->ip_blocks[i].status.hw = true;
3733 	}
3734 
3735 	return 0;
3736 }
3737 
3738 /**
3739  * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3740  *
3741  * @adev: amdgpu_device pointer
3742  *
3743  * Third resume function for hardware IPs.  The list of all the hardware
3744  * IPs that make up the asic is walked and the resume callbacks are run for
3745  * all DCE.  resume puts the hardware into a functional state after a suspend
3746  * and updates the software state as necessary.  This function is also used
3747  * for restoring the GPU after a GPU reset.
3748  *
3749  * Returns 0 on success, negative error code on failure.
3750  */
amdgpu_device_ip_resume_phase3(struct amdgpu_device * adev)3751 static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3752 {
3753 	int i, r;
3754 
3755 	for (i = 0; i < adev->num_ip_blocks; i++) {
3756 		if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3757 			continue;
3758 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3759 			r = adev->ip_blocks[i].version->funcs->resume(adev);
3760 			if (r) {
3761 				DRM_ERROR("resume of IP block <%s> failed %d\n",
3762 					  adev->ip_blocks[i].version->funcs->name, r);
3763 				return r;
3764 			}
3765 			adev->ip_blocks[i].status.hw = true;
3766 		}
3767 	}
3768 
3769 	return 0;
3770 }
3771 
3772 /**
3773  * amdgpu_device_ip_resume - run resume for hardware IPs
3774  *
3775  * @adev: amdgpu_device pointer
3776  *
3777  * Main resume function for hardware IPs.  The hardware IPs
3778  * are split into two resume functions because they are
3779  * also used in recovering from a GPU reset and some additional
3780  * steps need to be take between them.  In this case (S3/S4) they are
3781  * run sequentially.
3782  * Returns 0 on success, negative error code on failure.
3783  */
amdgpu_device_ip_resume(struct amdgpu_device * adev)3784 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3785 {
3786 	int r;
3787 
3788 	r = amdgpu_device_ip_resume_phase1(adev);
3789 	if (r)
3790 		return r;
3791 
3792 	r = amdgpu_device_fw_loading(adev);
3793 	if (r)
3794 		return r;
3795 
3796 	r = amdgpu_device_ip_resume_phase2(adev);
3797 
3798 	if (adev->mman.buffer_funcs_ring->sched.ready)
3799 		amdgpu_ttm_set_buffer_funcs_status(adev, true);
3800 
3801 	if (r)
3802 		return r;
3803 
3804 	amdgpu_fence_driver_hw_init(adev);
3805 
3806 	r = amdgpu_device_ip_resume_phase3(adev);
3807 
3808 	return r;
3809 }
3810 
3811 /**
3812  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3813  *
3814  * @adev: amdgpu_device pointer
3815  *
3816  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3817  */
amdgpu_device_detect_sriov_bios(struct amdgpu_device * adev)3818 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3819 {
3820 	if (amdgpu_sriov_vf(adev)) {
3821 		if (adev->is_atom_fw) {
3822 			if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3823 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3824 		} else {
3825 			if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3826 				adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3827 		}
3828 
3829 		if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3830 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3831 	}
3832 }
3833 
3834 /**
3835  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3836  *
3837  * @asic_type: AMD asic type
3838  *
3839  * Check if there is DC (new modesetting infrastructre) support for an asic.
3840  * returns true if DC has support, false if not.
3841  */
amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)3842 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3843 {
3844 	switch (asic_type) {
3845 #ifdef CONFIG_DRM_AMDGPU_SI
3846 	case CHIP_HAINAN:
3847 #endif
3848 	case CHIP_TOPAZ:
3849 		/* chips with no display hardware */
3850 		return false;
3851 #if defined(CONFIG_DRM_AMD_DC)
3852 	case CHIP_TAHITI:
3853 	case CHIP_PITCAIRN:
3854 	case CHIP_VERDE:
3855 	case CHIP_OLAND:
3856 		/*
3857 		 * We have systems in the wild with these ASICs that require
3858 		 * LVDS and VGA support which is not supported with DC.
3859 		 *
3860 		 * Fallback to the non-DC driver here by default so as not to
3861 		 * cause regressions.
3862 		 */
3863 #if defined(CONFIG_DRM_AMD_DC_SI)
3864 		return amdgpu_dc > 0;
3865 #else
3866 		return false;
3867 #endif
3868 	case CHIP_BONAIRE:
3869 	case CHIP_KAVERI:
3870 	case CHIP_KABINI:
3871 	case CHIP_MULLINS:
3872 		/*
3873 		 * We have systems in the wild with these ASICs that require
3874 		 * VGA support which is not supported with DC.
3875 		 *
3876 		 * Fallback to the non-DC driver here by default so as not to
3877 		 * cause regressions.
3878 		 */
3879 		return amdgpu_dc > 0;
3880 	default:
3881 		return amdgpu_dc != 0;
3882 #else
3883 	default:
3884 		if (amdgpu_dc > 0)
3885 			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
3886 		return false;
3887 #endif
3888 	}
3889 }
3890 
3891 /**
3892  * amdgpu_device_has_dc_support - check if dc is supported
3893  *
3894  * @adev: amdgpu_device pointer
3895  *
3896  * Returns true for supported, false for not supported
3897  */
amdgpu_device_has_dc_support(struct amdgpu_device * adev)3898 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3899 {
3900 	if (adev->enable_virtual_display ||
3901 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3902 		return false;
3903 
3904 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
3905 }
3906 
amdgpu_device_xgmi_reset_func(struct work_struct * __work)3907 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3908 {
3909 	struct amdgpu_device *adev =
3910 		container_of(__work, struct amdgpu_device, xgmi_reset_work);
3911 	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3912 
3913 	/* It's a bug to not have a hive within this function */
3914 	if (WARN_ON(!hive))
3915 		return;
3916 
3917 	/*
3918 	 * Use task barrier to synchronize all xgmi reset works across the
3919 	 * hive. task_barrier_enter and task_barrier_exit will block
3920 	 * until all the threads running the xgmi reset works reach
3921 	 * those points. task_barrier_full will do both blocks.
3922 	 */
3923 	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3924 
3925 		task_barrier_enter(&hive->tb);
3926 		adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3927 
3928 		if (adev->asic_reset_res)
3929 			goto fail;
3930 
3931 		task_barrier_exit(&hive->tb);
3932 		adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3933 
3934 		if (adev->asic_reset_res)
3935 			goto fail;
3936 
3937 		amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB);
3938 	} else {
3939 
3940 		task_barrier_full(&hive->tb);
3941 		adev->asic_reset_res =  amdgpu_asic_reset(adev);
3942 	}
3943 
3944 fail:
3945 	if (adev->asic_reset_res)
3946 		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3947 			 adev->asic_reset_res, adev_to_drm(adev)->unique);
3948 	amdgpu_put_xgmi_hive(hive);
3949 }
3950 
amdgpu_device_get_job_timeout_settings(struct amdgpu_device * adev)3951 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3952 {
3953 	char *input = amdgpu_lockup_timeout;
3954 	char *timeout_setting = NULL;
3955 	int index = 0;
3956 	long timeout;
3957 	int ret = 0;
3958 
3959 	/*
3960 	 * By default timeout for non compute jobs is 10000
3961 	 * and 60000 for compute jobs.
3962 	 * In SR-IOV or passthrough mode, timeout for compute
3963 	 * jobs are 60000 by default.
3964 	 */
3965 	adev->gfx_timeout = msecs_to_jiffies(10000);
3966 	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3967 	if (amdgpu_sriov_vf(adev))
3968 		adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3969 					msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3970 	else
3971 		adev->compute_timeout =  msecs_to_jiffies(60000);
3972 
3973 #ifdef notyet
3974 	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3975 		while ((timeout_setting = strsep(&input, ",")) &&
3976 				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3977 			ret = kstrtol(timeout_setting, 0, &timeout);
3978 			if (ret)
3979 				return ret;
3980 
3981 			if (timeout == 0) {
3982 				index++;
3983 				continue;
3984 			} else if (timeout < 0) {
3985 				timeout = MAX_SCHEDULE_TIMEOUT;
3986 				dev_warn(adev->dev, "lockup timeout disabled");
3987 				add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3988 			} else {
3989 				timeout = msecs_to_jiffies(timeout);
3990 			}
3991 
3992 			switch (index++) {
3993 			case 0:
3994 				adev->gfx_timeout = timeout;
3995 				break;
3996 			case 1:
3997 				adev->compute_timeout = timeout;
3998 				break;
3999 			case 2:
4000 				adev->sdma_timeout = timeout;
4001 				break;
4002 			case 3:
4003 				adev->video_timeout = timeout;
4004 				break;
4005 			default:
4006 				break;
4007 			}
4008 		}
4009 		/*
4010 		 * There is only one value specified and
4011 		 * it should apply to all non-compute jobs.
4012 		 */
4013 		if (index == 1) {
4014 			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
4015 			if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
4016 				adev->compute_timeout = adev->gfx_timeout;
4017 		}
4018 	}
4019 #endif
4020 
4021 	return ret;
4022 }
4023 
4024 /**
4025  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4026  *
4027  * @adev: amdgpu_device pointer
4028  *
4029  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4030  */
amdgpu_device_check_iommu_direct_map(struct amdgpu_device * adev)4031 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4032 {
4033 #ifdef notyet
4034 	struct iommu_domain *domain;
4035 
4036 	domain = iommu_get_domain_for_dev(adev->dev);
4037 	if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
4038 #endif
4039 		adev->ram_is_direct_mapped = true;
4040 }
4041 
4042 #if defined(CONFIG_HSA_AMD_P2P)
4043 /**
4044  * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4045  *
4046  * @adev: amdgpu_device pointer
4047  *
4048  * return if IOMMU remapping bar address
4049  */
amdgpu_device_check_iommu_remap(struct amdgpu_device * adev)4050 static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4051 {
4052 	struct iommu_domain *domain;
4053 
4054 	domain = iommu_get_domain_for_dev(adev->dev);
4055 	if (domain && (domain->type == IOMMU_DOMAIN_DMA ||
4056 		domain->type ==	IOMMU_DOMAIN_DMA_FQ))
4057 		return true;
4058 
4059 	return false;
4060 }
4061 #endif
4062 
4063 static const struct attribute *amdgpu_dev_attributes[] = {
4064 	&dev_attr_pcie_replay_count.attr,
4065 	NULL
4066 };
4067 
amdgpu_device_set_mcbp(struct amdgpu_device * adev)4068 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4069 {
4070 	if (amdgpu_mcbp == 1)
4071 		adev->gfx.mcbp = true;
4072 	else if (amdgpu_mcbp == 0)
4073 		adev->gfx.mcbp = false;
4074 
4075 	if (amdgpu_sriov_vf(adev))
4076 		adev->gfx.mcbp = true;
4077 
4078 	if (adev->gfx.mcbp)
4079 		DRM_INFO("MCBP is enabled\n");
4080 }
4081 
4082 /**
4083  * amdgpu_device_init - initialize the driver
4084  *
4085  * @adev: amdgpu_device pointer
4086  * @flags: driver flags
4087  *
4088  * Initializes the driver info and hw (all asics).
4089  * Returns 0 for success or an error on failure.
4090  * Called at driver startup.
4091  */
amdgpu_device_init(struct amdgpu_device * adev,uint32_t flags)4092 int amdgpu_device_init(struct amdgpu_device *adev,
4093 		       uint32_t flags)
4094 {
4095 	struct drm_device *ddev = adev_to_drm(adev);
4096 	struct pci_dev *pdev = adev->pdev;
4097 	int r, i;
4098 	bool px = false;
4099 	u32 max_MBps;
4100 	int tmp;
4101 
4102 	adev->shutdown = false;
4103 	adev->flags = flags;
4104 
4105 	if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
4106 		adev->asic_type = amdgpu_force_asic_type;
4107 	else
4108 		adev->asic_type = flags & AMD_ASIC_MASK;
4109 
4110 	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4111 	if (amdgpu_emu_mode == 1)
4112 		adev->usec_timeout *= 10;
4113 	adev->gmc.gart_size = 512 * 1024 * 1024;
4114 	adev->accel_working = false;
4115 	adev->num_rings = 0;
4116 	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4117 	adev->mman.buffer_funcs = NULL;
4118 	adev->mman.buffer_funcs_ring = NULL;
4119 	adev->vm_manager.vm_pte_funcs = NULL;
4120 	adev->vm_manager.vm_pte_num_scheds = 0;
4121 	adev->gmc.gmc_funcs = NULL;
4122 	adev->harvest_ip_mask = 0x0;
4123 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4124 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4125 
4126 	adev->smc_rreg = &amdgpu_invalid_rreg;
4127 	adev->smc_wreg = &amdgpu_invalid_wreg;
4128 	adev->pcie_rreg = &amdgpu_invalid_rreg;
4129 	adev->pcie_wreg = &amdgpu_invalid_wreg;
4130 	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4131 	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4132 	adev->pciep_rreg = &amdgpu_invalid_rreg;
4133 	adev->pciep_wreg = &amdgpu_invalid_wreg;
4134 	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4135 	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4136 	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4137 	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4138 	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4139 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4140 	adev->didt_rreg = &amdgpu_invalid_rreg;
4141 	adev->didt_wreg = &amdgpu_invalid_wreg;
4142 	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4143 	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4144 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4145 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4146 
4147 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4148 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4149 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4150 
4151 	/* mutex initialization are all done here so we
4152 	 * can recall function without having locking issues
4153 	 */
4154 	rw_init(&adev->firmware.mutex, "agfw");
4155 	rw_init(&adev->pm.mutex, "agpm");
4156 	rw_init(&adev->gfx.gpu_clock_mutex, "gfxclk");
4157 	rw_init(&adev->srbm_mutex, "srbm");
4158 	rw_init(&adev->gfx.pipe_reserve_mutex, "pipers");
4159 	rw_init(&adev->gfx.gfx_off_mutex, "gfxoff");
4160 	rw_init(&adev->gfx.partition_mutex, "gfxpar");
4161 	rw_init(&adev->grbm_idx_mutex, "grbmidx");
4162 	rw_init(&adev->mn_lock, "agpumn");
4163 	rw_init(&adev->virt.vf_errors.lock, "vferr");
4164 	rw_init(&adev->virt.rlcg_reg_lock, "vrlcg");
4165 	hash_init(adev->mn_hash);
4166 	rw_init(&adev->psp.mutex, "agpsp");
4167 	rw_init(&adev->notifier_lock, "agnf");
4168 	rw_init(&adev->pm.stable_pstate_ctx_lock, "agps");
4169 	rw_init(&adev->benchmark_mutex, "agbm");
4170 	rw_init(&adev->gfx.reset_sem_mutex, "agrsem");
4171 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
4172 	rw_init(&adev->enforce_isolation_mutex, "agim");
4173 	rw_init(&adev->gfx.kfd_sch_mutex, "kfdsch");
4174 
4175 	amdgpu_device_init_apu_flags(adev);
4176 
4177 	r = amdgpu_device_check_arguments(adev);
4178 	if (r)
4179 		return r;
4180 
4181 	mtx_init(&adev->mmio_idx_lock, IPL_TTY);
4182 	mtx_init(&adev->smc_idx_lock, IPL_TTY);
4183 	mtx_init(&adev->pcie_idx_lock, IPL_TTY);
4184 	mtx_init(&adev->uvd_ctx_idx_lock, IPL_TTY);
4185 	mtx_init(&adev->didt_idx_lock, IPL_TTY);
4186 	mtx_init(&adev->gc_cac_idx_lock, IPL_TTY);
4187 	mtx_init(&adev->se_cac_idx_lock, IPL_TTY);
4188 	mtx_init(&adev->audio_endpt_idx_lock, IPL_TTY);
4189 	mtx_init(&adev->mm_stats.lock, IPL_NONE);
4190 	mtx_init(&adev->wb.lock, IPL_TTY);
4191 
4192 	INIT_LIST_HEAD(&adev->reset_list);
4193 
4194 	INIT_LIST_HEAD(&adev->ras_list);
4195 
4196 	INIT_LIST_HEAD(&adev->pm.od_kobj_list);
4197 
4198 	INIT_DELAYED_WORK(&adev->delayed_init_work,
4199 			  amdgpu_device_delayed_init_work_handler);
4200 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4201 			  amdgpu_device_delay_enable_gfx_off);
4202 	/*
4203 	 * Initialize the enforce_isolation work structures for each XCP
4204 	 * partition.  This work handler is responsible for enforcing shader
4205 	 * isolation on AMD GPUs.  It counts the number of emitted fences for
4206 	 * each GFX and compute ring.  If there are any fences, it schedules
4207 	 * the `enforce_isolation_work` to be run after a delay.  If there are
4208 	 * no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4209 	 * runqueue.
4210 	 */
4211 	for (i = 0; i < MAX_XCP; i++) {
4212 		INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4213 				  amdgpu_gfx_enforce_isolation_handler);
4214 		adev->gfx.enforce_isolation[i].adev = adev;
4215 		adev->gfx.enforce_isolation[i].xcp_id = i;
4216 	}
4217 
4218 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4219 
4220 	adev->gfx.gfx_off_req_count = 1;
4221 	adev->gfx.gfx_off_residency = 0;
4222 	adev->gfx.gfx_off_entrycount = 0;
4223 	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
4224 
4225 	atomic_set(&adev->throttling_logging_enabled, 1);
4226 	/*
4227 	 * If throttling continues, logging will be performed every minute
4228 	 * to avoid log flooding. "-1" is subtracted since the thermal
4229 	 * throttling interrupt comes every second. Thus, the total logging
4230 	 * interval is 59 seconds(retelimited printk interval) + 1(waiting
4231 	 * for throttling interrupt) = 60 seconds.
4232 	 */
4233 	ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
4234 	ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4235 
4236 #ifdef __linux__
4237 	/* Registers mapping */
4238 	/* TODO: block userspace mapping of io register */
4239 	if (adev->asic_type >= CHIP_BONAIRE) {
4240 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
4241 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
4242 	} else {
4243 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
4244 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
4245 	}
4246 #endif
4247 
4248 	for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4249 		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
4250 
4251 #ifdef __linux__
4252 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
4253 	if (!adev->rmmio)
4254 		return -ENOMEM;
4255 #endif
4256 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4257 	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4258 
4259 	/*
4260 	 * Reset domain needs to be present early, before XGMI hive discovered
4261 	 * (if any) and intitialized to use reset sem and in_gpu reset flag
4262 	 * early on during init and before calling to RREG32.
4263 	 */
4264 	adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
4265 	if (!adev->reset_domain)
4266 		return -ENOMEM;
4267 
4268 	/* detect hw virtualization here */
4269 	amdgpu_detect_virtualization(adev);
4270 
4271 	amdgpu_device_get_pcie_info(adev);
4272 
4273 	r = amdgpu_device_get_job_timeout_settings(adev);
4274 	if (r) {
4275 		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4276 		return r;
4277 	}
4278 
4279 	amdgpu_device_set_mcbp(adev);
4280 
4281 	/* early init functions */
4282 	r = amdgpu_device_ip_early_init(adev);
4283 	if (r)
4284 		return r;
4285 
4286 	/* Get rid of things like offb */
4287 	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
4288 	if (r)
4289 		return r;
4290 
4291 	/* Enable TMZ based on IP_VERSION */
4292 	amdgpu_gmc_tmz_set(adev);
4293 
4294 	if (amdgpu_sriov_vf(adev) &&
4295 	    amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
4296 		/* VF MMIO access (except mailbox range) from CPU
4297 		 * will be blocked during sriov runtime
4298 		 */
4299 		adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4300 
4301 	amdgpu_gmc_noretry_set(adev);
4302 	/* Need to get xgmi info early to decide the reset behavior*/
4303 	if (adev->gmc.xgmi.supported) {
4304 		r = adev->gfxhub.funcs->get_xgmi_info(adev);
4305 		if (r)
4306 			return r;
4307 	}
4308 
4309 	/* enable PCIE atomic ops */
4310 #ifdef notyet
4311 	if (amdgpu_sriov_vf(adev)) {
4312 		if (adev->virt.fw_reserve.p_pf2vf)
4313 			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4314 						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4315 				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4316 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4317 	 * internal path natively support atomics, set have_atomics_support to true.
4318 	 */
4319 	} else if ((adev->flags & AMD_IS_APU) &&
4320 		   (amdgpu_ip_version(adev, GC_HWIP, 0) >
4321 		    IP_VERSION(9, 0, 0))) {
4322 		adev->have_atomics_support = true;
4323 	} else {
4324 		adev->have_atomics_support =
4325 			!pci_enable_atomic_ops_to_root(adev->pdev,
4326 					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
4327 					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4328 	}
4329 
4330 	if (!adev->have_atomics_support)
4331 		dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4332 #else
4333 	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a
4334 	 * internal path natively support atomics, set have_atomics_support to true.
4335 	 */
4336 	if ((adev->flags & AMD_IS_APU) &&
4337 		(adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)))
4338 		adev->have_atomics_support = true;
4339 	else
4340 		adev->have_atomics_support = false;
4341 #endif
4342 
4343 	/* doorbell bar mapping and doorbell index init*/
4344 	amdgpu_doorbell_init(adev);
4345 
4346 	if (amdgpu_emu_mode == 1) {
4347 		/* post the asic on emulation mode */
4348 		emu_soc_asic_init(adev);
4349 		goto fence_driver_init;
4350 	}
4351 
4352 	amdgpu_reset_init(adev);
4353 
4354 	/* detect if we are with an SRIOV vbios */
4355 	if (adev->bios)
4356 		amdgpu_device_detect_sriov_bios(adev);
4357 
4358 	/* check if we need to reset the asic
4359 	 *  E.g., driver was not cleanly unloaded previously, etc.
4360 	 */
4361 	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4362 		if (adev->gmc.xgmi.num_physical_nodes) {
4363 			dev_info(adev->dev, "Pending hive reset.\n");
4364 			adev->gmc.xgmi.pending_reset = true;
4365 			/* Only need to init necessary block for SMU to handle the reset */
4366 			for (i = 0; i < adev->num_ip_blocks; i++) {
4367 				if (!adev->ip_blocks[i].status.valid)
4368 					continue;
4369 				if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
4370 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
4371 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
4372 				      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
4373 					DRM_DEBUG("IP %s disabled for hw_init.\n",
4374 						adev->ip_blocks[i].version->funcs->name);
4375 					adev->ip_blocks[i].status.hw = true;
4376 				}
4377 			}
4378 		} else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
4379 				   !amdgpu_device_has_display_hardware(adev)) {
4380 					r = psp_gpu_reset(adev);
4381 		} else {
4382 				tmp = amdgpu_reset_method;
4383 				/* It should do a default reset when loading or reloading the driver,
4384 				 * regardless of the module parameter reset_method.
4385 				 */
4386 				amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4387 				r = amdgpu_asic_reset(adev);
4388 				amdgpu_reset_method = tmp;
4389 		}
4390 
4391 		if (r) {
4392 		  dev_err(adev->dev, "asic reset on init failed\n");
4393 		  goto failed;
4394 		}
4395 	}
4396 
4397 	/* Post card if necessary */
4398 	if (amdgpu_device_need_post(adev)) {
4399 		if (!adev->bios) {
4400 			dev_err(adev->dev, "no vBIOS found\n");
4401 			r = -EINVAL;
4402 			goto failed;
4403 		}
4404 		DRM_INFO("GPU posting now...\n");
4405 		r = amdgpu_device_asic_init(adev);
4406 		if (r) {
4407 			dev_err(adev->dev, "gpu post error!\n");
4408 			goto failed;
4409 		}
4410 	}
4411 
4412 	if (adev->bios) {
4413 		if (adev->is_atom_fw) {
4414 			/* Initialize clocks */
4415 			r = amdgpu_atomfirmware_get_clock_info(adev);
4416 			if (r) {
4417 				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4418 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4419 				goto failed;
4420 			}
4421 		} else {
4422 			/* Initialize clocks */
4423 			r = amdgpu_atombios_get_clock_info(adev);
4424 			if (r) {
4425 				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4426 				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
4427 				goto failed;
4428 			}
4429 			/* init i2c buses */
4430 			if (!amdgpu_device_has_dc_support(adev))
4431 				amdgpu_atombios_i2c_init(adev);
4432 		}
4433 	}
4434 
4435 fence_driver_init:
4436 	/* Fence driver */
4437 	r = amdgpu_fence_driver_sw_init(adev);
4438 	if (r) {
4439 		dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4440 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
4441 		goto failed;
4442 	}
4443 
4444 	/* init the mode config */
4445 	drm_mode_config_init(adev_to_drm(adev));
4446 
4447 	r = amdgpu_device_ip_init(adev);
4448 	if (r) {
4449 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4450 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
4451 		goto release_ras_con;
4452 	}
4453 
4454 	amdgpu_fence_driver_hw_init(adev);
4455 
4456 	dev_info(adev->dev,
4457 		"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4458 			adev->gfx.config.max_shader_engines,
4459 			adev->gfx.config.max_sh_per_se,
4460 			adev->gfx.config.max_cu_per_sh,
4461 			adev->gfx.cu_info.number);
4462 
4463 #ifdef __OpenBSD__
4464 {
4465 	const char *chip_name;
4466 	uint32_t version = adev->ip_versions[GC_HWIP][0];
4467 	int maj, min, rev;
4468 
4469 	switch (adev->asic_type) {
4470 	case CHIP_RAVEN:
4471 		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
4472 			chip_name = "RAVEN2";
4473 		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
4474 			chip_name = "PICASSO";
4475 		else
4476 			chip_name = "RAVEN";
4477 		break;
4478 	case CHIP_RENOIR:
4479 		if (adev->apu_flags & AMD_APU_IS_RENOIR)
4480 			chip_name = "RENOIR";
4481 		else
4482 			chip_name = "GREEN_SARDINE";
4483 		break;
4484 	default:
4485 		chip_name = amdgpu_asic_name[adev->asic_type];
4486 	}
4487 
4488 	printf("%s: %s", adev->self.dv_xname, chip_name);
4489 	/* show graphics/compute ip block version, not set on < GFX9 */
4490 	if (version) {
4491 		maj = IP_VERSION_MAJ(version);
4492 		min = IP_VERSION_MIN(version);
4493 		rev = IP_VERSION_REV(version);
4494 		printf(" GC %d.%d.%d", maj, min, rev);
4495 	}
4496 	printf(" %d CU rev 0x%02x\n", adev->gfx.cu_info.number, adev->rev_id);
4497 }
4498 #endif
4499 
4500 	adev->accel_working = true;
4501 
4502 	amdgpu_vm_check_compute_bug(adev);
4503 
4504 	/* Initialize the buffer migration limit. */
4505 	if (amdgpu_moverate >= 0)
4506 		max_MBps = amdgpu_moverate;
4507 	else
4508 		max_MBps = 8; /* Allow 8 MB/s. */
4509 	/* Get a log2 for easy divisions. */
4510 	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
4511 
4512 	/*
4513 	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4514 	 * Otherwise the mgpu fan boost feature will be skipped due to the
4515 	 * gpu instance is counted less.
4516 	 */
4517 	amdgpu_register_gpu_instance(adev);
4518 
4519 	/* enable clockgating, etc. after ib tests, etc. since some blocks require
4520 	 * explicit gating rather than handling it automatically.
4521 	 */
4522 	if (!adev->gmc.xgmi.pending_reset) {
4523 		r = amdgpu_device_ip_late_init(adev);
4524 		if (r) {
4525 			dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4526 			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
4527 			goto release_ras_con;
4528 		}
4529 		/* must succeed. */
4530 		amdgpu_ras_resume(adev);
4531 		queue_delayed_work(system_wq, &adev->delayed_init_work,
4532 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4533 	}
4534 
4535 	if (amdgpu_sriov_vf(adev)) {
4536 		amdgpu_virt_release_full_gpu(adev, true);
4537 		flush_delayed_work(&adev->delayed_init_work);
4538 	}
4539 
4540 	/*
4541 	 * Place those sysfs registering after `late_init`. As some of those
4542 	 * operations performed in `late_init` might affect the sysfs
4543 	 * interfaces creating.
4544 	 */
4545 	r = amdgpu_atombios_sysfs_init(adev);
4546 	if (r)
4547 		drm_err(&adev->ddev,
4548 			"registering atombios sysfs failed (%d).\n", r);
4549 
4550 	r = amdgpu_pm_sysfs_init(adev);
4551 	if (r)
4552 		DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4553 
4554 	r = amdgpu_ucode_sysfs_init(adev);
4555 	if (r) {
4556 		adev->ucode_sysfs_en = false;
4557 		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4558 	} else
4559 		adev->ucode_sysfs_en = true;
4560 
4561 	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
4562 	if (r)
4563 		dev_err(adev->dev, "Could not create amdgpu device attr\n");
4564 
4565 	r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group);
4566 	if (r)
4567 		dev_err(adev->dev,
4568 			"Could not create amdgpu board attributes\n");
4569 
4570 	amdgpu_fru_sysfs_init(adev);
4571 	amdgpu_reg_state_sysfs_init(adev);
4572 
4573 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4574 		r = amdgpu_pmu_init(adev);
4575 	if (r)
4576 		dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4577 
4578 	/* Have stored pci confspace at hand for restore in sudden PCI error */
4579 	if (amdgpu_device_cache_pci_state(adev->pdev))
4580 		pci_restore_state(pdev);
4581 
4582 	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
4583 	/* this will fail for cards that aren't VGA class devices, just
4584 	 * ignore it
4585 	 */
4586 #ifdef notyet
4587 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4588 		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
4589 #endif
4590 
4591 	px = amdgpu_device_supports_px(ddev);
4592 
4593 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4594 				apple_gmux_detect(NULL, NULL)))
4595 		vga_switcheroo_register_client(adev->pdev,
4596 					       &amdgpu_switcheroo_ops, px);
4597 
4598 	if (px)
4599 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
4600 
4601 	if (adev->gmc.xgmi.pending_reset)
4602 		queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
4603 				   msecs_to_jiffies(AMDGPU_RESUME_MS));
4604 
4605 	amdgpu_device_check_iommu_direct_map(adev);
4606 
4607 	return 0;
4608 
4609 release_ras_con:
4610 	if (amdgpu_sriov_vf(adev))
4611 		amdgpu_virt_release_full_gpu(adev, true);
4612 
4613 	/* failed in exclusive mode due to timeout */
4614 	if (amdgpu_sriov_vf(adev) &&
4615 		!amdgpu_sriov_runtime(adev) &&
4616 		amdgpu_virt_mmio_blocked(adev) &&
4617 		!amdgpu_virt_wait_reset(adev)) {
4618 		dev_err(adev->dev, "VF exclusive mode timeout\n");
4619 		/* Don't send request since VF is inactive. */
4620 		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4621 		adev->virt.ops = NULL;
4622 		r = -EAGAIN;
4623 	}
4624 	amdgpu_release_ras_context(adev);
4625 
4626 failed:
4627 	amdgpu_vf_error_trans_all(adev);
4628 
4629 	return r;
4630 }
4631 
amdgpu_device_unmap_mmio(struct amdgpu_device * adev)4632 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4633 {
4634 	STUB();
4635 #ifdef notyet
4636 
4637 	/* Clear all CPU mappings pointing to this device */
4638 	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4639 #endif
4640 
4641 	/* Unmap all mapped bars - Doorbell, registers and VRAM */
4642 	amdgpu_doorbell_fini(adev);
4643 
4644 #ifdef __linux__
4645 	iounmap(adev->rmmio);
4646 	adev->rmmio = NULL;
4647 	if (adev->mman.aper_base_kaddr)
4648 		iounmap(adev->mman.aper_base_kaddr);
4649 	adev->mman.aper_base_kaddr = NULL;
4650 #else
4651 	if (adev->rmmio_size > 0)
4652 		bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
4653 		    adev->rmmio_size);
4654 	adev->rmmio_size = 0;
4655 	adev->rmmio = NULL;
4656 	if (adev->mman.aper_base_kaddr)
4657 		bus_space_unmap(adev->memt, adev->mman.aper_bsh,
4658 		    adev->gmc.visible_vram_size);
4659 	adev->mman.aper_base_kaddr = NULL;
4660 #endif
4661 
4662 	/* Memory manager related */
4663 	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4664 #ifdef __linux__
4665 		arch_phys_wc_del(adev->gmc.vram_mtrr);
4666 		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4667 #else
4668 		drm_mtrr_del(0, adev->gmc.aper_base, adev->gmc.aper_size, DRM_MTRR_WC);
4669 #endif
4670 	}
4671 }
4672 
4673 /**
4674  * amdgpu_device_fini_hw - tear down the driver
4675  *
4676  * @adev: amdgpu_device pointer
4677  *
4678  * Tear down the driver info (all asics).
4679  * Called at driver shutdown.
4680  */
amdgpu_device_fini_hw(struct amdgpu_device * adev)4681 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4682 {
4683 	dev_info(adev->dev, "amdgpu: finishing device.\n");
4684 	flush_delayed_work(&adev->delayed_init_work);
4685 
4686 	if (adev->mman.initialized)
4687 		drain_workqueue(adev->mman.bdev.wq);
4688 	adev->shutdown = true;
4689 
4690 	/* make sure IB test finished before entering exclusive mode
4691 	 * to avoid preemption on IB test
4692 	 */
4693 	if (amdgpu_sriov_vf(adev)) {
4694 		amdgpu_virt_request_full_gpu(adev, false);
4695 		amdgpu_virt_fini_data_exchange(adev);
4696 	}
4697 
4698 	/* disable all interrupts */
4699 	amdgpu_irq_disable_all(adev);
4700 	if (adev->mode_info.mode_config_initialized) {
4701 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4702 			drm_helper_force_disable_all(adev_to_drm(adev));
4703 		else
4704 			drm_atomic_helper_shutdown(adev_to_drm(adev));
4705 	}
4706 	amdgpu_fence_driver_hw_fini(adev);
4707 
4708 	if (adev->pm.sysfs_initialized)
4709 		amdgpu_pm_sysfs_fini(adev);
4710 	if (adev->ucode_sysfs_en)
4711 		amdgpu_ucode_sysfs_fini(adev);
4712 	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4713 	amdgpu_fru_sysfs_fini(adev);
4714 
4715 	amdgpu_reg_state_sysfs_fini(adev);
4716 
4717 	/* disable ras feature must before hw fini */
4718 	amdgpu_ras_pre_fini(adev);
4719 
4720 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4721 
4722 	amdgpu_device_ip_fini_early(adev);
4723 
4724 	amdgpu_irq_fini_hw(adev);
4725 
4726 	if (adev->mman.initialized)
4727 		ttm_device_clear_dma_mappings(&adev->mman.bdev);
4728 
4729 	amdgpu_gart_dummy_page_fini(adev);
4730 
4731 	if (drm_dev_is_unplugged(adev_to_drm(adev)))
4732 		amdgpu_device_unmap_mmio(adev);
4733 
4734 }
4735 
amdgpu_device_fini_sw(struct amdgpu_device * adev)4736 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4737 {
4738 	int idx;
4739 	bool px;
4740 
4741 	amdgpu_device_ip_fini(adev);
4742 	amdgpu_fence_driver_sw_fini(adev);
4743 	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4744 	adev->accel_working = false;
4745 	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4746 
4747 	amdgpu_reset_fini(adev);
4748 
4749 	/* free i2c buses */
4750 	if (!amdgpu_device_has_dc_support(adev))
4751 		amdgpu_i2c_fini(adev);
4752 
4753 	if (amdgpu_emu_mode != 1)
4754 		amdgpu_atombios_fini(adev);
4755 
4756 	kfree(adev->bios);
4757 	adev->bios = NULL;
4758 
4759 	kfree(adev->fru_info);
4760 	adev->fru_info = NULL;
4761 
4762 	px = amdgpu_device_supports_px(adev_to_drm(adev));
4763 
4764 	if (px || (!dev_is_removable(&adev->pdev->dev) &&
4765 				apple_gmux_detect(NULL, NULL)))
4766 		vga_switcheroo_unregister_client(adev->pdev);
4767 
4768 	if (px)
4769 		vga_switcheroo_fini_domain_pm_ops(adev->dev);
4770 
4771 	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4772 		vga_client_unregister(adev->pdev);
4773 
4774 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4775 #ifdef __linux__
4776 		iounmap(adev->rmmio);
4777 		adev->rmmio = NULL;
4778 #else
4779 		if (adev->rmmio_size > 0)
4780 			bus_space_unmap(adev->rmmio_bst, adev->rmmio_bsh,
4781 			    adev->rmmio_size);
4782 		adev->rmmio_size = 0;
4783 		adev->rmmio = NULL;
4784 #endif
4785 		amdgpu_doorbell_fini(adev);
4786 		drm_dev_exit(idx);
4787 	}
4788 
4789 	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4790 		amdgpu_pmu_fini(adev);
4791 	if (adev->mman.discovery_bin)
4792 		amdgpu_discovery_fini(adev);
4793 
4794 	amdgpu_reset_put_reset_domain(adev->reset_domain);
4795 	adev->reset_domain = NULL;
4796 
4797 	kfree(adev->pci_state);
4798 
4799 }
4800 
4801 /**
4802  * amdgpu_device_evict_resources - evict device resources
4803  * @adev: amdgpu device object
4804  *
4805  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4806  * of the vram memory type. Mainly used for evicting device resources
4807  * at suspend time.
4808  *
4809  */
amdgpu_device_evict_resources(struct amdgpu_device * adev)4810 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4811 {
4812 	int ret;
4813 
4814 	/* No need to evict vram on APUs for suspend to ram or s2idle */
4815 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4816 		return 0;
4817 
4818 	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4819 	if (ret)
4820 		DRM_WARN("evicting device resources failed\n");
4821 	return ret;
4822 }
4823 
4824 /*
4825  * Suspend & resume.
4826  */
4827 /**
4828  * amdgpu_device_prepare - prepare for device suspend
4829  *
4830  * @dev: drm dev pointer
4831  *
4832  * Prepare to put the hw in the suspend state (all asics).
4833  * Returns 0 for success or an error on failure.
4834  * Called at driver suspend.
4835  */
amdgpu_device_prepare(struct drm_device * dev)4836 int amdgpu_device_prepare(struct drm_device *dev)
4837 {
4838 	struct amdgpu_device *adev = drm_to_adev(dev);
4839 	int i, r;
4840 
4841 	amdgpu_choose_low_power_state(adev);
4842 
4843 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4844 		return 0;
4845 
4846 	/* Evict the majority of BOs before starting suspend sequence */
4847 	r = amdgpu_device_evict_resources(adev);
4848 	if (r)
4849 		goto unprepare;
4850 
4851 	flush_delayed_work(&adev->gfx.gfx_off_delay_work);
4852 
4853 	for (i = 0; i < adev->num_ip_blocks; i++) {
4854 		if (!adev->ip_blocks[i].status.valid)
4855 			continue;
4856 		if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
4857 			continue;
4858 		r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev);
4859 		if (r)
4860 			goto unprepare;
4861 	}
4862 
4863 	return 0;
4864 
4865 unprepare:
4866 	adev->in_s0ix = adev->in_s3 = false;
4867 
4868 	return r;
4869 }
4870 
4871 /**
4872  * amdgpu_device_suspend - initiate device suspend
4873  *
4874  * @dev: drm dev pointer
4875  * @fbcon : notify the fbdev of suspend
4876  *
4877  * Puts the hw in the suspend state (all asics).
4878  * Returns 0 for success or an error on failure.
4879  * Called at driver suspend.
4880  */
amdgpu_device_suspend(struct drm_device * dev,bool fbcon)4881 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4882 {
4883 	struct amdgpu_device *adev = drm_to_adev(dev);
4884 	int r = 0;
4885 
4886 	if (adev->shutdown)
4887 		return 0;
4888 
4889 #ifdef notyet
4890 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4891 		return 0;
4892 #endif
4893 
4894 	adev->in_suspend = true;
4895 
4896 	if (amdgpu_sriov_vf(adev)) {
4897 		amdgpu_virt_fini_data_exchange(adev);
4898 		r = amdgpu_virt_request_full_gpu(adev, false);
4899 		if (r)
4900 			return r;
4901 	}
4902 
4903 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4904 		DRM_WARN("smart shift update failed\n");
4905 
4906 	if (fbcon)
4907 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4908 
4909 	cancel_delayed_work_sync(&adev->delayed_init_work);
4910 
4911 	amdgpu_ras_suspend(adev);
4912 
4913 	amdgpu_device_ip_suspend_phase1(adev);
4914 
4915 	if (!adev->in_s0ix)
4916 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4917 
4918 	r = amdgpu_device_evict_resources(adev);
4919 	if (r)
4920 		return r;
4921 
4922 	amdgpu_ttm_set_buffer_funcs_status(adev, false);
4923 
4924 	amdgpu_fence_driver_hw_fini(adev);
4925 
4926 	amdgpu_device_ip_suspend_phase2(adev);
4927 
4928 	if (amdgpu_sriov_vf(adev))
4929 		amdgpu_virt_release_full_gpu(adev, false);
4930 
4931 	r = amdgpu_dpm_notify_rlc_state(adev, false);
4932 	if (r)
4933 		return r;
4934 
4935 	return 0;
4936 }
4937 
4938 /**
4939  * amdgpu_device_resume - initiate device resume
4940  *
4941  * @dev: drm dev pointer
4942  * @fbcon : notify the fbdev of resume
4943  *
4944  * Bring the hw back to operating state (all asics).
4945  * Returns 0 for success or an error on failure.
4946  * Called at driver resume.
4947  */
amdgpu_device_resume(struct drm_device * dev,bool fbcon)4948 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4949 {
4950 	struct amdgpu_device *adev = drm_to_adev(dev);
4951 	int r = 0;
4952 
4953 	if (amdgpu_sriov_vf(adev)) {
4954 		r = amdgpu_virt_request_full_gpu(adev, true);
4955 		if (r)
4956 			return r;
4957 	}
4958 
4959 #ifdef notyet
4960 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4961 		return 0;
4962 #endif
4963 
4964 	if (adev->in_s0ix)
4965 		amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4966 
4967 	/* post card */
4968 	if (amdgpu_device_need_post(adev)) {
4969 		r = amdgpu_device_asic_init(adev);
4970 		if (r)
4971 			dev_err(adev->dev, "amdgpu asic init failed\n");
4972 	}
4973 
4974 	r = amdgpu_device_ip_resume(adev);
4975 
4976 	if (r) {
4977 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4978 		goto exit;
4979 	}
4980 
4981 	if (!adev->in_s0ix) {
4982 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4983 		if (r)
4984 			goto exit;
4985 	}
4986 
4987 	r = amdgpu_device_ip_late_init(adev);
4988 	if (r)
4989 		goto exit;
4990 
4991 	queue_delayed_work(system_wq, &adev->delayed_init_work,
4992 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
4993 exit:
4994 	if (amdgpu_sriov_vf(adev)) {
4995 		amdgpu_virt_init_data_exchange(adev);
4996 		amdgpu_virt_release_full_gpu(adev, true);
4997 	}
4998 
4999 	if (r)
5000 		return r;
5001 
5002 	/* Make sure IB tests flushed */
5003 	flush_delayed_work(&adev->delayed_init_work);
5004 
5005 	if (fbcon)
5006 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
5007 
5008 	amdgpu_ras_resume(adev);
5009 
5010 	if (adev->mode_info.num_crtc) {
5011 		/*
5012 		 * Most of the connector probing functions try to acquire runtime pm
5013 		 * refs to ensure that the GPU is powered on when connector polling is
5014 		 * performed. Since we're calling this from a runtime PM callback,
5015 		 * trying to acquire rpm refs will cause us to deadlock.
5016 		 *
5017 		 * Since we're guaranteed to be holding the rpm lock, it's safe to
5018 		 * temporarily disable the rpm helpers so this doesn't deadlock us.
5019 		 */
5020 #if defined(CONFIG_PM) && defined(__linux__)
5021 		dev->dev->power.disable_depth++;
5022 #endif
5023 		if (!adev->dc_enabled)
5024 			drm_helper_hpd_irq_event(dev);
5025 		else
5026 			drm_kms_helper_hotplug_event(dev);
5027 #if defined(CONFIG_PM) && defined(__linux__)
5028 		dev->dev->power.disable_depth--;
5029 #endif
5030 	}
5031 	adev->in_suspend = false;
5032 
5033 	if (adev->enable_mes)
5034 		amdgpu_mes_self_test(adev);
5035 
5036 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
5037 		DRM_WARN("smart shift update failed\n");
5038 
5039 	return 0;
5040 }
5041 
5042 /**
5043  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
5044  *
5045  * @adev: amdgpu_device pointer
5046  *
5047  * The list of all the hardware IPs that make up the asic is walked and
5048  * the check_soft_reset callbacks are run.  check_soft_reset determines
5049  * if the asic is still hung or not.
5050  * Returns true if any of the IPs are still in a hung state, false if not.
5051  */
amdgpu_device_ip_check_soft_reset(struct amdgpu_device * adev)5052 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
5053 {
5054 	int i;
5055 	bool asic_hang = false;
5056 
5057 	if (amdgpu_sriov_vf(adev))
5058 		return true;
5059 
5060 	if (amdgpu_asic_need_full_reset(adev))
5061 		return true;
5062 
5063 	for (i = 0; i < adev->num_ip_blocks; i++) {
5064 		if (!adev->ip_blocks[i].status.valid)
5065 			continue;
5066 		if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5067 			adev->ip_blocks[i].status.hang =
5068 				adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
5069 		if (adev->ip_blocks[i].status.hang) {
5070 			dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
5071 			asic_hang = true;
5072 		}
5073 	}
5074 	return asic_hang;
5075 }
5076 
5077 /**
5078  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5079  *
5080  * @adev: amdgpu_device pointer
5081  *
5082  * The list of all the hardware IPs that make up the asic is walked and the
5083  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
5084  * handles any IP specific hardware or software state changes that are
5085  * necessary for a soft reset to succeed.
5086  * Returns 0 on success, negative error code on failure.
5087  */
amdgpu_device_ip_pre_soft_reset(struct amdgpu_device * adev)5088 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5089 {
5090 	int i, r = 0;
5091 
5092 	for (i = 0; i < adev->num_ip_blocks; i++) {
5093 		if (!adev->ip_blocks[i].status.valid)
5094 			continue;
5095 		if (adev->ip_blocks[i].status.hang &&
5096 		    adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5097 			r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
5098 			if (r)
5099 				return r;
5100 		}
5101 	}
5102 
5103 	return 0;
5104 }
5105 
5106 /**
5107  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5108  *
5109  * @adev: amdgpu_device pointer
5110  *
5111  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
5112  * reset is necessary to recover.
5113  * Returns true if a full asic reset is required, false if not.
5114  */
amdgpu_device_ip_need_full_reset(struct amdgpu_device * adev)5115 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5116 {
5117 	int i;
5118 
5119 	if (amdgpu_asic_need_full_reset(adev))
5120 		return true;
5121 
5122 	for (i = 0; i < adev->num_ip_blocks; i++) {
5123 		if (!adev->ip_blocks[i].status.valid)
5124 			continue;
5125 		if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
5126 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
5127 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
5128 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
5129 		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5130 			if (adev->ip_blocks[i].status.hang) {
5131 				dev_info(adev->dev, "Some block need full reset!\n");
5132 				return true;
5133 			}
5134 		}
5135 	}
5136 	return false;
5137 }
5138 
5139 /**
5140  * amdgpu_device_ip_soft_reset - do a soft reset
5141  *
5142  * @adev: amdgpu_device pointer
5143  *
5144  * The list of all the hardware IPs that make up the asic is walked and the
5145  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
5146  * IP specific hardware or software state changes that are necessary to soft
5147  * reset the IP.
5148  * Returns 0 on success, negative error code on failure.
5149  */
amdgpu_device_ip_soft_reset(struct amdgpu_device * adev)5150 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5151 {
5152 	int i, r = 0;
5153 
5154 	for (i = 0; i < adev->num_ip_blocks; i++) {
5155 		if (!adev->ip_blocks[i].status.valid)
5156 			continue;
5157 		if (adev->ip_blocks[i].status.hang &&
5158 		    adev->ip_blocks[i].version->funcs->soft_reset) {
5159 			r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
5160 			if (r)
5161 				return r;
5162 		}
5163 	}
5164 
5165 	return 0;
5166 }
5167 
5168 /**
5169  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
5170  *
5171  * @adev: amdgpu_device pointer
5172  *
5173  * The list of all the hardware IPs that make up the asic is walked and the
5174  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
5175  * handles any IP specific hardware or software state changes that are
5176  * necessary after the IP has been soft reset.
5177  * Returns 0 on success, negative error code on failure.
5178  */
amdgpu_device_ip_post_soft_reset(struct amdgpu_device * adev)5179 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5180 {
5181 	int i, r = 0;
5182 
5183 	for (i = 0; i < adev->num_ip_blocks; i++) {
5184 		if (!adev->ip_blocks[i].status.valid)
5185 			continue;
5186 		if (adev->ip_blocks[i].status.hang &&
5187 		    adev->ip_blocks[i].version->funcs->post_soft_reset)
5188 			r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
5189 		if (r)
5190 			goto unprepare;
5191 	}
5192 
5193 	return 0;
5194 
5195 unprepare:
5196 	adev->in_s0ix = adev->in_s3 = false;
5197 
5198 	return r;
5199 }
5200 
5201 /**
5202  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5203  *
5204  * @adev: amdgpu_device pointer
5205  * @reset_context: amdgpu reset context pointer
5206  *
5207  * do VF FLR and reinitialize Asic
5208  * return 0 means succeeded otherwise failed
5209  */
amdgpu_device_reset_sriov(struct amdgpu_device * adev,struct amdgpu_reset_context * reset_context)5210 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5211 				     struct amdgpu_reset_context *reset_context)
5212 {
5213 	int r;
5214 	struct amdgpu_hive_info *hive = NULL;
5215 
5216 	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5217 		if (!amdgpu_ras_get_fed_status(adev))
5218 			amdgpu_virt_ready_to_reset(adev);
5219 		amdgpu_virt_wait_reset(adev);
5220 		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5221 		r = amdgpu_virt_request_full_gpu(adev, true);
5222 	} else {
5223 		r = amdgpu_virt_reset_gpu(adev);
5224 	}
5225 	if (r)
5226 		return r;
5227 
5228 	amdgpu_ras_set_fed(adev, false);
5229 	amdgpu_irq_gpu_reset_resume_helper(adev);
5230 
5231 	/* some sw clean up VF needs to do before recover */
5232 	amdgpu_virt_post_reset(adev);
5233 
5234 	/* Resume IP prior to SMC */
5235 	r = amdgpu_device_ip_reinit_early_sriov(adev);
5236 	if (r)
5237 		return r;
5238 
5239 	amdgpu_virt_init_data_exchange(adev);
5240 
5241 	r = amdgpu_device_fw_loading(adev);
5242 	if (r)
5243 		return r;
5244 
5245 	/* now we are okay to resume SMC/CP/SDMA */
5246 	r = amdgpu_device_ip_reinit_late_sriov(adev);
5247 	if (r)
5248 		return r;
5249 
5250 	hive = amdgpu_get_xgmi_hive(adev);
5251 	/* Update PSP FW topology after reset */
5252 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
5253 		r = amdgpu_xgmi_update_topology(hive, adev);
5254 	if (hive)
5255 		amdgpu_put_xgmi_hive(hive);
5256 	if (r)
5257 		return r;
5258 
5259 	r = amdgpu_ib_ring_tests(adev);
5260 	if (r)
5261 		return r;
5262 
5263 	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5264 		amdgpu_inc_vram_lost(adev);
5265 
5266 	/* need to be called during full access so we can't do it later like
5267 	 * bare-metal does.
5268 	 */
5269 	amdgpu_amdkfd_post_reset(adev);
5270 	amdgpu_virt_release_full_gpu(adev, true);
5271 
5272 	/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5273 	if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
5274 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
5275 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
5276 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
5277 		amdgpu_ras_resume(adev);
5278 	return 0;
5279 }
5280 
5281 /**
5282  * amdgpu_device_has_job_running - check if there is any job in mirror list
5283  *
5284  * @adev: amdgpu_device pointer
5285  *
5286  * check if there is any job in mirror list
5287  */
amdgpu_device_has_job_running(struct amdgpu_device * adev)5288 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5289 {
5290 	int i;
5291 	struct drm_sched_job *job;
5292 
5293 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5294 		struct amdgpu_ring *ring = adev->rings[i];
5295 
5296 		if (!amdgpu_ring_sched_ready(ring))
5297 			continue;
5298 
5299 		spin_lock(&ring->sched.job_list_lock);
5300 		job = list_first_entry_or_null(&ring->sched.pending_list,
5301 					       struct drm_sched_job, list);
5302 		spin_unlock(&ring->sched.job_list_lock);
5303 		if (job)
5304 			return true;
5305 	}
5306 	return false;
5307 }
5308 
5309 /**
5310  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5311  *
5312  * @adev: amdgpu_device pointer
5313  *
5314  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5315  * a hung GPU.
5316  */
amdgpu_device_should_recover_gpu(struct amdgpu_device * adev)5317 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5318 {
5319 
5320 	if (amdgpu_gpu_recovery == 0)
5321 		goto disabled;
5322 
5323 	/* Skip soft reset check in fatal error mode */
5324 	if (!amdgpu_ras_is_poison_mode_supported(adev))
5325 		return true;
5326 
5327 	if (amdgpu_sriov_vf(adev))
5328 		return true;
5329 
5330 	if (amdgpu_gpu_recovery == -1) {
5331 		switch (adev->asic_type) {
5332 #ifdef CONFIG_DRM_AMDGPU_SI
5333 		case CHIP_VERDE:
5334 		case CHIP_TAHITI:
5335 		case CHIP_PITCAIRN:
5336 		case CHIP_OLAND:
5337 		case CHIP_HAINAN:
5338 #endif
5339 #ifdef CONFIG_DRM_AMDGPU_CIK
5340 		case CHIP_KAVERI:
5341 		case CHIP_KABINI:
5342 		case CHIP_MULLINS:
5343 #endif
5344 		case CHIP_CARRIZO:
5345 		case CHIP_STONEY:
5346 		case CHIP_CYAN_SKILLFISH:
5347 			goto disabled;
5348 		default:
5349 			break;
5350 		}
5351 	}
5352 
5353 	return true;
5354 
5355 disabled:
5356 		dev_info(adev->dev, "GPU recovery disabled.\n");
5357 		return false;
5358 }
5359 
amdgpu_device_mode1_reset(struct amdgpu_device * adev)5360 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5361 {
5362 	u32 i;
5363 	int ret = 0;
5364 
5365 	amdgpu_atombios_scratch_regs_engine_hung(adev, true);
5366 
5367 	dev_info(adev->dev, "GPU mode1 reset\n");
5368 
5369 	/* Cache the state before bus master disable. The saved config space
5370 	 * values are used in other cases like restore after mode-2 reset.
5371 	 */
5372 	amdgpu_device_cache_pci_state(adev->pdev);
5373 
5374 	/* disable BM */
5375 	pci_clear_master(adev->pdev);
5376 
5377 	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5378 		dev_info(adev->dev, "GPU smu mode1 reset\n");
5379 		ret = amdgpu_dpm_mode1_reset(adev);
5380 	} else {
5381 		dev_info(adev->dev, "GPU psp mode1 reset\n");
5382 		ret = psp_gpu_reset(adev);
5383 	}
5384 
5385 	if (ret)
5386 		goto mode1_reset_failed;
5387 
5388 	amdgpu_device_load_pci_state(adev->pdev);
5389 	ret = amdgpu_psp_wait_for_bootloader(adev);
5390 	if (ret)
5391 		goto mode1_reset_failed;
5392 
5393 	/* wait for asic to come out of reset */
5394 	for (i = 0; i < adev->usec_timeout; i++) {
5395 		u32 memsize = adev->nbio.funcs->get_memsize(adev);
5396 
5397 		if (memsize != 0xffffffff)
5398 			break;
5399 		udelay(1);
5400 	}
5401 
5402 	if (i >= adev->usec_timeout) {
5403 		ret = -ETIMEDOUT;
5404 		goto mode1_reset_failed;
5405 	}
5406 
5407 	amdgpu_atombios_scratch_regs_engine_hung(adev, false);
5408 
5409 	return 0;
5410 
5411 mode1_reset_failed:
5412 	dev_err(adev->dev, "GPU mode1 reset failed\n");
5413 	return ret;
5414 }
5415 
amdgpu_device_pre_asic_reset(struct amdgpu_device * adev,struct amdgpu_reset_context * reset_context)5416 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5417 				 struct amdgpu_reset_context *reset_context)
5418 {
5419 	int i, r = 0;
5420 	struct amdgpu_job *job = NULL;
5421 	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5422 	bool need_full_reset =
5423 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5424 
5425 	if (reset_context->reset_req_dev == adev)
5426 		job = reset_context->job;
5427 
5428 	if (amdgpu_sriov_vf(adev))
5429 		amdgpu_virt_pre_reset(adev);
5430 
5431 	amdgpu_fence_driver_isr_toggle(adev, true);
5432 
5433 	/* block all schedulers and reset given job's ring */
5434 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5435 		struct amdgpu_ring *ring = adev->rings[i];
5436 
5437 		if (!amdgpu_ring_sched_ready(ring))
5438 			continue;
5439 
5440 		/* Clear job fence from fence drv to avoid force_completion
5441 		 * leave NULL and vm flush fence in fence drv
5442 		 */
5443 		amdgpu_fence_driver_clear_job_fences(ring);
5444 
5445 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
5446 		amdgpu_fence_driver_force_completion(ring);
5447 	}
5448 
5449 	amdgpu_fence_driver_isr_toggle(adev, false);
5450 
5451 	if (job && job->vm)
5452 		drm_sched_increase_karma(&job->base);
5453 
5454 	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5455 	/* If reset handler not implemented, continue; otherwise return */
5456 	if (r == -EOPNOTSUPP)
5457 		r = 0;
5458 	else
5459 		return r;
5460 
5461 	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
5462 	if (!amdgpu_sriov_vf(adev)) {
5463 
5464 		if (!need_full_reset)
5465 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5466 
5467 		if (!need_full_reset && amdgpu_gpu_recovery &&
5468 		    amdgpu_device_ip_check_soft_reset(adev)) {
5469 			amdgpu_device_ip_pre_soft_reset(adev);
5470 			r = amdgpu_device_ip_soft_reset(adev);
5471 			amdgpu_device_ip_post_soft_reset(adev);
5472 			if (r || amdgpu_device_ip_check_soft_reset(adev)) {
5473 				dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5474 				need_full_reset = true;
5475 			}
5476 		}
5477 
5478 		if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5479 			dev_info(tmp_adev->dev, "Dumping IP State\n");
5480 			/* Trigger ip dump before we reset the asic */
5481 			for (i = 0; i < tmp_adev->num_ip_blocks; i++)
5482 				if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5483 					tmp_adev->ip_blocks[i].version->funcs
5484 						->dump_ip_state((void *)tmp_adev);
5485 			dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5486 		}
5487 
5488 		if (need_full_reset)
5489 			r = amdgpu_device_ip_suspend(adev);
5490 		if (need_full_reset)
5491 			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5492 		else
5493 			clear_bit(AMDGPU_NEED_FULL_RESET,
5494 				  &reset_context->flags);
5495 	}
5496 
5497 	return r;
5498 }
5499 
amdgpu_do_asic_reset(struct list_head * device_list_handle,struct amdgpu_reset_context * reset_context)5500 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5501 			 struct amdgpu_reset_context *reset_context)
5502 {
5503 	struct amdgpu_device *tmp_adev = NULL;
5504 	bool need_full_reset, skip_hw_reset, vram_lost = false;
5505 	int r = 0;
5506 
5507 	/* Try reset handler method first */
5508 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5509 				    reset_list);
5510 
5511 	reset_context->reset_device_list = device_list_handle;
5512 	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
5513 	/* If reset handler not implemented, continue; otherwise return */
5514 	if (r == -EOPNOTSUPP)
5515 		r = 0;
5516 	else
5517 		return r;
5518 
5519 	/* Reset handler not implemented, use the default method */
5520 	need_full_reset =
5521 		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5522 	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5523 
5524 	/*
5525 	 * ASIC reset has to be done on all XGMI hive nodes ASAP
5526 	 * to allow proper links negotiation in FW (within 1 sec)
5527 	 */
5528 	if (!skip_hw_reset && need_full_reset) {
5529 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5530 			/* For XGMI run all resets in parallel to speed up the process */
5531 			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5532 				tmp_adev->gmc.xgmi.pending_reset = false;
5533 				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
5534 					r = -EALREADY;
5535 			} else
5536 				r = amdgpu_asic_reset(tmp_adev);
5537 
5538 			if (r) {
5539 				dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
5540 					 r, adev_to_drm(tmp_adev)->unique);
5541 				goto out;
5542 			}
5543 		}
5544 
5545 		/* For XGMI wait for all resets to complete before proceed */
5546 		if (!r) {
5547 			list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5548 				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
5549 					flush_work(&tmp_adev->xgmi_reset_work);
5550 					r = tmp_adev->asic_reset_res;
5551 					if (r)
5552 						break;
5553 				}
5554 			}
5555 		}
5556 	}
5557 
5558 	if (!r && amdgpu_ras_intr_triggered()) {
5559 		list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5560 			amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB);
5561 		}
5562 
5563 		amdgpu_ras_intr_cleared();
5564 	}
5565 
5566 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5567 		if (need_full_reset) {
5568 			/* post card */
5569 			amdgpu_ras_set_fed(tmp_adev, false);
5570 			r = amdgpu_device_asic_init(tmp_adev);
5571 			if (r) {
5572 				dev_warn(tmp_adev->dev, "asic atom init failed!");
5573 			} else {
5574 				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5575 
5576 				r = amdgpu_device_ip_resume_phase1(tmp_adev);
5577 				if (r)
5578 					goto out;
5579 
5580 				vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
5581 
5582 				if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5583 					amdgpu_coredump(tmp_adev, false, vram_lost, reset_context->job);
5584 
5585 				if (vram_lost) {
5586 					DRM_INFO("VRAM is lost due to GPU reset!\n");
5587 					amdgpu_inc_vram_lost(tmp_adev);
5588 				}
5589 
5590 				r = amdgpu_device_fw_loading(tmp_adev);
5591 				if (r)
5592 					return r;
5593 
5594 				r = amdgpu_xcp_restore_partition_mode(
5595 					tmp_adev->xcp_mgr);
5596 				if (r)
5597 					goto out;
5598 
5599 				r = amdgpu_device_ip_resume_phase2(tmp_adev);
5600 				if (r)
5601 					goto out;
5602 
5603 				if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5604 					amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
5605 
5606 				r = amdgpu_device_ip_resume_phase3(tmp_adev);
5607 				if (r)
5608 					goto out;
5609 
5610 				if (vram_lost)
5611 					amdgpu_device_fill_reset_magic(tmp_adev);
5612 
5613 				/*
5614 				 * Add this ASIC as tracked as reset was already
5615 				 * complete successfully.
5616 				 */
5617 				amdgpu_register_gpu_instance(tmp_adev);
5618 
5619 				if (!reset_context->hive &&
5620 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5621 					amdgpu_xgmi_add_device(tmp_adev);
5622 
5623 				r = amdgpu_device_ip_late_init(tmp_adev);
5624 				if (r)
5625 					goto out;
5626 
5627 				drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5628 
5629 				/*
5630 				 * The GPU enters bad state once faulty pages
5631 				 * by ECC has reached the threshold, and ras
5632 				 * recovery is scheduled next. So add one check
5633 				 * here to break recovery if it indeed exceeds
5634 				 * bad page threshold, and remind user to
5635 				 * retire this GPU or setting one bigger
5636 				 * bad_page_threshold value to fix this once
5637 				 * probing driver again.
5638 				 */
5639 				if (!amdgpu_ras_is_rma(tmp_adev)) {
5640 					/* must succeed. */
5641 					amdgpu_ras_resume(tmp_adev);
5642 				} else {
5643 					r = -EINVAL;
5644 					goto out;
5645 				}
5646 
5647 				/* Update PSP FW topology after reset */
5648 				if (reset_context->hive &&
5649 				    tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5650 					r = amdgpu_xgmi_update_topology(
5651 						reset_context->hive, tmp_adev);
5652 			}
5653 		}
5654 
5655 out:
5656 		if (!r) {
5657 			amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5658 			r = amdgpu_ib_ring_tests(tmp_adev);
5659 			if (r) {
5660 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5661 				need_full_reset = true;
5662 				r = -EAGAIN;
5663 				goto end;
5664 			}
5665 		}
5666 
5667 		if (r)
5668 			tmp_adev->asic_reset_res = r;
5669 	}
5670 
5671 end:
5672 	if (need_full_reset)
5673 		set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5674 	else
5675 		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5676 	return r;
5677 }
5678 
amdgpu_device_set_mp1_state(struct amdgpu_device * adev)5679 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5680 {
5681 
5682 	switch (amdgpu_asic_reset_method(adev)) {
5683 	case AMD_RESET_METHOD_MODE1:
5684 		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5685 		break;
5686 	case AMD_RESET_METHOD_MODE2:
5687 		adev->mp1_state = PP_MP1_STATE_RESET;
5688 		break;
5689 	default:
5690 		adev->mp1_state = PP_MP1_STATE_NONE;
5691 		break;
5692 	}
5693 
5694 	pci_dev_put(p);
5695 }
5696 
amdgpu_device_unset_mp1_state(struct amdgpu_device * adev)5697 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5698 {
5699 	amdgpu_vf_error_trans_all(adev);
5700 	adev->mp1_state = PP_MP1_STATE_NONE;
5701 }
5702 
amdgpu_device_resume_display_audio(struct amdgpu_device * adev)5703 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5704 {
5705 	STUB();
5706 #ifdef notyet
5707 	struct pci_dev *p = NULL;
5708 
5709 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5710 			adev->pdev->bus->number, 1);
5711 	if (p) {
5712 		pm_runtime_enable(&(p->dev));
5713 		pm_runtime_resume(&(p->dev));
5714 	}
5715 #endif
5716 }
5717 
amdgpu_device_suspend_display_audio(struct amdgpu_device * adev)5718 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5719 {
5720 	enum amd_reset_method reset_method;
5721 	struct pci_dev *p = NULL;
5722 	u64 expires;
5723 
5724 	/*
5725 	 * For now, only BACO and mode1 reset are confirmed
5726 	 * to suffer the audio issue without proper suspended.
5727 	 */
5728 	reset_method = amdgpu_asic_reset_method(adev);
5729 	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5730 	     (reset_method != AMD_RESET_METHOD_MODE1))
5731 		return -EINVAL;
5732 
5733 	STUB();
5734 	return -ENOSYS;
5735 #ifdef notyet
5736 
5737 	p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5738 			adev->pdev->bus->number, 1);
5739 	if (!p)
5740 		return -ENODEV;
5741 
5742 	expires = pm_runtime_autosuspend_expiration(&(p->dev));
5743 	if (!expires)
5744 		/*
5745 		 * If we cannot get the audio device autosuspend delay,
5746 		 * a fixed 4S interval will be used. Considering 3S is
5747 		 * the audio controller default autosuspend delay setting.
5748 		 * 4S used here is guaranteed to cover that.
5749 		 */
5750 		expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5751 
5752 	while (!pm_runtime_status_suspended(&(p->dev))) {
5753 		if (!pm_runtime_suspend(&(p->dev)))
5754 			break;
5755 
5756 		if (expires < ktime_get_mono_fast_ns()) {
5757 			dev_warn(adev->dev, "failed to suspend display audio\n");
5758 			pci_dev_put(p);
5759 			/* TODO: abort the succeeding gpu reset? */
5760 			return -ETIMEDOUT;
5761 		}
5762 	}
5763 
5764 	pm_runtime_disable(&(p->dev));
5765 
5766 	pci_dev_put(p);
5767 	return 0;
5768 #endif
5769 }
5770 
amdgpu_device_stop_pending_resets(struct amdgpu_device * adev)5771 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5772 {
5773 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5774 
5775 #if defined(CONFIG_DEBUG_FS)
5776 	if (!amdgpu_sriov_vf(adev))
5777 		cancel_work(&adev->reset_work);
5778 #endif
5779 
5780 	if (adev->kfd.dev)
5781 		cancel_work(&adev->kfd.reset_work);
5782 
5783 	if (amdgpu_sriov_vf(adev))
5784 		cancel_work(&adev->virt.flr_work);
5785 
5786 	if (con && adev->ras_enabled)
5787 		cancel_work(&con->recovery_work);
5788 
5789 }
5790 
amdgpu_device_health_check(struct list_head * device_list_handle)5791 static int amdgpu_device_health_check(struct list_head *device_list_handle)
5792 {
5793 	struct amdgpu_device *tmp_adev;
5794 	int ret = 0;
5795 	u32 status;
5796 
5797 #ifdef notyet
5798 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5799 		pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
5800 		if (PCI_POSSIBLE_ERROR(status)) {
5801 			dev_err(tmp_adev->dev, "device lost from bus!");
5802 			ret = -ENODEV;
5803 		}
5804 	}
5805 #endif
5806 
5807 	return ret;
5808 }
5809 
5810 /**
5811  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5812  *
5813  * @adev: amdgpu_device pointer
5814  * @job: which job trigger hang
5815  * @reset_context: amdgpu reset context pointer
5816  *
5817  * Attempt to reset the GPU if it has hung (all asics).
5818  * Attempt to do soft-reset or full-reset and reinitialize Asic
5819  * Returns 0 for success or an error on failure.
5820  */
5821 
amdgpu_device_gpu_recover(struct amdgpu_device * adev,struct amdgpu_job * job,struct amdgpu_reset_context * reset_context)5822 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5823 			      struct amdgpu_job *job,
5824 			      struct amdgpu_reset_context *reset_context)
5825 {
5826 	struct list_head device_list, *device_list_handle =  NULL;
5827 	bool job_signaled = false;
5828 	struct amdgpu_hive_info *hive = NULL;
5829 	struct amdgpu_device *tmp_adev = NULL;
5830 	int i, r = 0;
5831 	bool need_emergency_restart = false;
5832 	bool audio_suspended = false;
5833 	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
5834 
5835 	/*
5836 	 * Special case: RAS triggered and full reset isn't supported
5837 	 */
5838 	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5839 
5840 	/*
5841 	 * Flush RAM to disk so that after reboot
5842 	 * the user can read log and see why the system rebooted.
5843 	 */
5844 	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
5845 		amdgpu_ras_get_context(adev)->reboot) {
5846 		DRM_WARN("Emergency reboot.");
5847 
5848 #ifdef notyet
5849 		ksys_sync_helper();
5850 		emergency_restart();
5851 #else
5852 		panic("emergency_restart");
5853 #endif
5854 	}
5855 
5856 	dev_info(adev->dev, "GPU %s begin!\n",
5857 		need_emergency_restart ? "jobs stop":"reset");
5858 
5859 	if (!amdgpu_sriov_vf(adev))
5860 		hive = amdgpu_get_xgmi_hive(adev);
5861 	if (hive)
5862 		mutex_lock(&hive->hive_lock);
5863 
5864 	reset_context->job = job;
5865 	reset_context->hive = hive;
5866 	/*
5867 	 * Build list of devices to reset.
5868 	 * In case we are in XGMI hive mode, resort the device list
5869 	 * to put adev in the 1st position.
5870 	 */
5871 	INIT_LIST_HEAD(&device_list);
5872 	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
5873 		list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5874 			list_add_tail(&tmp_adev->reset_list, &device_list);
5875 			if (adev->shutdown)
5876 				tmp_adev->shutdown = true;
5877 		}
5878 		if (!list_is_first(&adev->reset_list, &device_list))
5879 			list_rotate_to_front(&adev->reset_list, &device_list);
5880 		device_list_handle = &device_list;
5881 	} else {
5882 		list_add_tail(&adev->reset_list, &device_list);
5883 		device_list_handle = &device_list;
5884 	}
5885 
5886 	if (!amdgpu_sriov_vf(adev)) {
5887 		r = amdgpu_device_health_check(device_list_handle);
5888 		if (r)
5889 			goto end_reset;
5890 	}
5891 
5892 	/* We need to lock reset domain only once both for XGMI and single device */
5893 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5894 				    reset_list);
5895 	amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5896 
5897 	/* block all schedulers and reset given job's ring */
5898 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5899 
5900 		amdgpu_device_set_mp1_state(tmp_adev);
5901 
5902 		/*
5903 		 * Try to put the audio codec into suspend state
5904 		 * before gpu reset started.
5905 		 *
5906 		 * Due to the power domain of the graphics device
5907 		 * is shared with AZ power domain. Without this,
5908 		 * we may change the audio hardware from behind
5909 		 * the audio driver's back. That will trigger
5910 		 * some audio codec errors.
5911 		 */
5912 		if (!amdgpu_device_suspend_display_audio(tmp_adev))
5913 			audio_suspended = true;
5914 
5915 		amdgpu_ras_set_error_query_ready(tmp_adev, false);
5916 
5917 		cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5918 
5919 		amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
5920 
5921 		/*
5922 		 * Mark these ASICs to be reseted as untracked first
5923 		 * And add them back after reset completed
5924 		 */
5925 		amdgpu_unregister_gpu_instance(tmp_adev);
5926 
5927 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5928 
5929 		/* disable ras on ALL IPs */
5930 		if (!need_emergency_restart &&
5931 		      amdgpu_device_ip_need_full_reset(tmp_adev))
5932 			amdgpu_ras_suspend(tmp_adev);
5933 
5934 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5935 			struct amdgpu_ring *ring = tmp_adev->rings[i];
5936 
5937 			if (!amdgpu_ring_sched_ready(ring))
5938 				continue;
5939 
5940 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5941 
5942 			if (need_emergency_restart)
5943 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5944 		}
5945 		atomic_inc(&tmp_adev->gpu_reset_counter);
5946 	}
5947 
5948 	if (need_emergency_restart)
5949 		goto skip_sched_resume;
5950 
5951 	/*
5952 	 * Must check guilty signal here since after this point all old
5953 	 * HW fences are force signaled.
5954 	 *
5955 	 * job->base holds a reference to parent fence
5956 	 */
5957 	if (job && dma_fence_is_signaled(&job->hw_fence)) {
5958 		job_signaled = true;
5959 		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5960 		goto skip_hw_reset;
5961 	}
5962 
5963 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
5964 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5965 		r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5966 		/*TODO Should we stop ?*/
5967 		if (r) {
5968 			dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5969 				  r, adev_to_drm(tmp_adev)->unique);
5970 			tmp_adev->asic_reset_res = r;
5971 		}
5972 	}
5973 
5974 	/* Actual ASIC resets if needed.*/
5975 	/* Host driver will handle XGMI hive reset for SRIOV */
5976 	if (amdgpu_sriov_vf(adev)) {
5977 		if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) {
5978 			dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
5979 			amdgpu_ras_set_fed(adev, true);
5980 			set_bit(AMDGPU_HOST_FLR, &reset_context->flags);
5981 		}
5982 
5983 		r = amdgpu_device_reset_sriov(adev, reset_context);
5984 		if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
5985 			amdgpu_virt_release_full_gpu(adev, true);
5986 			goto retry;
5987 		}
5988 		if (r)
5989 			adev->asic_reset_res = r;
5990 	} else {
5991 		r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5992 		if (r && r == -EAGAIN)
5993 			goto retry;
5994 	}
5995 
5996 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5997 		/*
5998 		 * Drop any pending non scheduler resets queued before reset is done.
5999 		 * Any reset scheduled after this point would be valid. Scheduler resets
6000 		 * were already dropped during drm_sched_stop and no new ones can come
6001 		 * in before drm_sched_start.
6002 		 */
6003 		amdgpu_device_stop_pending_resets(tmp_adev);
6004 	}
6005 
6006 skip_hw_reset:
6007 
6008 	/* Post ASIC reset for all devs .*/
6009 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6010 
6011 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6012 			struct amdgpu_ring *ring = tmp_adev->rings[i];
6013 
6014 			if (!amdgpu_ring_sched_ready(ring))
6015 				continue;
6016 
6017 			drm_sched_start(&ring->sched);
6018 		}
6019 
6020 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
6021 			drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
6022 
6023 		if (tmp_adev->asic_reset_res)
6024 			r = tmp_adev->asic_reset_res;
6025 
6026 		tmp_adev->asic_reset_res = 0;
6027 
6028 		if (r) {
6029 			/* bad news, how to tell it to userspace ?
6030 			 * for ras error, we should report GPU bad status instead of
6031 			 * reset failure
6032 			 */
6033 			if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
6034 			    !amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
6035 				dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
6036 					atomic_read(&tmp_adev->gpu_reset_counter));
6037 			amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
6038 		} else {
6039 			dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
6040 			if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
6041 				DRM_WARN("smart shift update failed\n");
6042 		}
6043 	}
6044 
6045 skip_sched_resume:
6046 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6047 		/* unlock kfd: SRIOV would do it separately */
6048 		if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
6049 			amdgpu_amdkfd_post_reset(tmp_adev);
6050 
6051 		/* kfd_post_reset will do nothing if kfd device is not initialized,
6052 		 * need to bring up kfd here if it's not be initialized before
6053 		 */
6054 		if (!adev->kfd.init_complete)
6055 			amdgpu_amdkfd_device_init(adev);
6056 
6057 		if (audio_suspended)
6058 			amdgpu_device_resume_display_audio(tmp_adev);
6059 
6060 		amdgpu_device_unset_mp1_state(tmp_adev);
6061 
6062 		amdgpu_ras_set_error_query_ready(tmp_adev, true);
6063 	}
6064 
6065 	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
6066 					    reset_list);
6067 	amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
6068 
6069 end_reset:
6070 	if (hive) {
6071 		mutex_unlock(&hive->hive_lock);
6072 		amdgpu_put_xgmi_hive(hive);
6073 	}
6074 
6075 	if (r)
6076 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6077 
6078 	atomic_set(&adev->reset_domain->reset_res, r);
6079 	return r;
6080 }
6081 
6082 /**
6083  * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6084  *
6085  * @adev: amdgpu_device pointer
6086  * @speed: pointer to the speed of the link
6087  * @width: pointer to the width of the link
6088  *
6089  * Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6090  * first physical partner to an AMD dGPU.
6091  * This will exclude any virtual switches and links.
6092  */
amdgpu_device_partner_bandwidth(struct amdgpu_device * adev,enum pci_bus_speed * speed,enum pcie_link_width * width)6093 static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6094 					    enum pci_bus_speed *speed,
6095 					    enum pcie_link_width *width)
6096 {
6097 	struct pci_dev *parent = adev->pdev;
6098 
6099 	if (!speed || !width)
6100 		return;
6101 
6102 	*speed = PCI_SPEED_UNKNOWN;
6103 	*width = PCIE_LNK_WIDTH_UNKNOWN;
6104 
6105 	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6106 		while ((parent = pci_upstream_bridge(parent))) {
6107 			/* skip upstream/downstream switches internal to dGPU*/
6108 			if (parent->vendor == PCI_VENDOR_ID_ATI)
6109 				continue;
6110 			*speed = pcie_get_speed_cap(parent);
6111 			*width = pcie_get_width_cap(parent);
6112 			break;
6113 		}
6114 	} else {
6115 		/* use the current speeds rather than max if switching is not supported */
6116 		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
6117 	}
6118 }
6119 
6120 /**
6121  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6122  *
6123  * @adev: amdgpu_device pointer
6124  *
6125  * Fetchs and stores in the driver the PCIE capabilities (gen speed
6126  * and lanes) of the slot the device is in. Handles APUs and
6127  * virtualized environments where PCIE config space may not be available.
6128  */
amdgpu_device_get_pcie_info(struct amdgpu_device * adev)6129 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6130 {
6131 	struct pci_dev *pdev;
6132 	enum pci_bus_speed speed_cap, platform_speed_cap;
6133 	enum pcie_link_width platform_link_width;
6134 
6135 	if (amdgpu_pcie_gen_cap)
6136 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6137 
6138 	if (amdgpu_pcie_lane_cap)
6139 		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6140 
6141 	/* covers APUs as well */
6142 	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6143 		if (adev->pm.pcie_gen_mask == 0)
6144 			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6145 		if (adev->pm.pcie_mlw_mask == 0)
6146 			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6147 		return;
6148 	}
6149 
6150 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6151 		return;
6152 
6153 	amdgpu_device_partner_bandwidth(adev, &platform_speed_cap,
6154 					&platform_link_width);
6155 
6156 	if (adev->pm.pcie_gen_mask == 0) {
6157 		/* asic caps */
6158 		pdev = adev->pdev;
6159 		speed_cap = pcie_get_speed_cap(pdev);
6160 		if (speed_cap == PCI_SPEED_UNKNOWN) {
6161 			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6162 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6163 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6164 		} else {
6165 			if (speed_cap == PCIE_SPEED_32_0GT)
6166 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6167 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6168 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6169 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6170 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6171 			else if (speed_cap == PCIE_SPEED_16_0GT)
6172 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6173 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6174 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6175 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6176 			else if (speed_cap == PCIE_SPEED_8_0GT)
6177 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6178 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6179 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6180 			else if (speed_cap == PCIE_SPEED_5_0GT)
6181 				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6182 							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6183 			else
6184 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6185 		}
6186 		/* platform caps */
6187 		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6188 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6189 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6190 		} else {
6191 			if (platform_speed_cap == PCIE_SPEED_32_0GT)
6192 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6193 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6194 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6195 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
6196 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6197 			else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6198 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6199 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6200 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
6201 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6202 			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6203 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6204 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
6205 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6206 			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6207 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
6208 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6209 			else
6210 				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6211 
6212 		}
6213 	}
6214 	if (adev->pm.pcie_mlw_mask == 0) {
6215 		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6216 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6217 		} else {
6218 			switch (platform_link_width) {
6219 			case PCIE_LNK_X32:
6220 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
6221 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6222 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6223 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6224 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6225 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6226 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6227 				break;
6228 			case PCIE_LNK_X16:
6229 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
6230 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6231 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6232 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6233 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6234 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6235 				break;
6236 			case PCIE_LNK_X12:
6237 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
6238 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6239 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6240 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6241 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6242 				break;
6243 			case PCIE_LNK_X8:
6244 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
6245 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6246 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6247 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6248 				break;
6249 			case PCIE_LNK_X4:
6250 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
6251 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6252 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6253 				break;
6254 			case PCIE_LNK_X2:
6255 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
6256 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6257 				break;
6258 			case PCIE_LNK_X1:
6259 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6260 				break;
6261 			default:
6262 				break;
6263 			}
6264 		}
6265 	}
6266 }
6267 
6268 /**
6269  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6270  *
6271  * @adev: amdgpu_device pointer
6272  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6273  *
6274  * Return true if @peer_adev can access (DMA) @adev through the PCIe
6275  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6276  * @peer_adev.
6277  */
amdgpu_device_is_peer_accessible(struct amdgpu_device * adev,struct amdgpu_device * peer_adev)6278 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6279 				      struct amdgpu_device *peer_adev)
6280 {
6281 #ifdef CONFIG_HSA_AMD_P2P
6282 	bool p2p_access =
6283 		!adev->gmc.xgmi.connected_to_cpu &&
6284 		!(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
6285 
6286 	bool is_large_bar = adev->gmc.visible_vram_size &&
6287 		adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6288 	bool p2p_addressable = amdgpu_device_check_iommu_remap(peer_adev);
6289 
6290 	if (!p2p_addressable) {
6291 		uint64_t address_mask = peer_adev->dev->dma_mask ?
6292 			~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
6293 		resource_size_t aper_limit =
6294 			adev->gmc.aper_base + adev->gmc.aper_size - 1;
6295 
6296 		p2p_addressable = !(adev->gmc.aper_base & address_mask ||
6297 				     aper_limit & address_mask);
6298 	}
6299 	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6300 #else
6301 	return false;
6302 #endif
6303 }
6304 
amdgpu_device_baco_enter(struct drm_device * dev)6305 int amdgpu_device_baco_enter(struct drm_device *dev)
6306 {
6307 	struct amdgpu_device *adev = drm_to_adev(dev);
6308 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6309 
6310 	if (!amdgpu_device_supports_baco(dev))
6311 		return -ENOTSUPP;
6312 
6313 	if (ras && adev->ras_enabled &&
6314 	    adev->nbio.funcs->enable_doorbell_interrupt)
6315 		adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6316 
6317 	return amdgpu_dpm_baco_enter(adev);
6318 }
6319 
amdgpu_device_baco_exit(struct drm_device * dev)6320 int amdgpu_device_baco_exit(struct drm_device *dev)
6321 {
6322 	struct amdgpu_device *adev = drm_to_adev(dev);
6323 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6324 	int ret = 0;
6325 
6326 	if (!amdgpu_device_supports_baco(dev))
6327 		return -ENOTSUPP;
6328 
6329 	ret = amdgpu_dpm_baco_exit(adev);
6330 	if (ret)
6331 		return ret;
6332 
6333 	if (ras && adev->ras_enabled &&
6334 	    adev->nbio.funcs->enable_doorbell_interrupt)
6335 		adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6336 
6337 	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6338 	    adev->nbio.funcs->clear_doorbell_interrupt)
6339 		adev->nbio.funcs->clear_doorbell_interrupt(adev);
6340 
6341 	return 0;
6342 }
6343 
6344 /**
6345  * amdgpu_pci_error_detected - Called when a PCI error is detected.
6346  * @pdev: PCI device struct
6347  * @state: PCI channel state
6348  *
6349  * Description: Called when a PCI error is detected.
6350  *
6351  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6352  */
amdgpu_pci_error_detected(struct pci_dev * pdev,pci_channel_state_t state)6353 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6354 {
6355 	STUB();
6356 	return 0;
6357 #ifdef notyet
6358 	struct drm_device *dev = pci_get_drvdata(pdev);
6359 	struct amdgpu_device *adev = drm_to_adev(dev);
6360 	int i;
6361 
6362 	DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
6363 
6364 	if (adev->gmc.xgmi.num_physical_nodes > 1) {
6365 		DRM_WARN("No support for XGMI hive yet...");
6366 		return PCI_ERS_RESULT_DISCONNECT;
6367 	}
6368 
6369 	adev->pci_channel_state = state;
6370 
6371 	switch (state) {
6372 	case pci_channel_io_normal:
6373 		return PCI_ERS_RESULT_CAN_RECOVER;
6374 	/* Fatal error, prepare for slot reset */
6375 	case pci_channel_io_frozen:
6376 		/*
6377 		 * Locking adev->reset_domain->sem will prevent any external access
6378 		 * to GPU during PCI error recovery
6379 		 */
6380 		amdgpu_device_lock_reset_domain(adev->reset_domain);
6381 		amdgpu_device_set_mp1_state(adev);
6382 
6383 		/*
6384 		 * Block any work scheduling as we do for regular GPU reset
6385 		 * for the duration of the recovery
6386 		 */
6387 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6388 			struct amdgpu_ring *ring = adev->rings[i];
6389 
6390 			if (!amdgpu_ring_sched_ready(ring))
6391 				continue;
6392 
6393 			drm_sched_stop(&ring->sched, NULL);
6394 		}
6395 		atomic_inc(&adev->gpu_reset_counter);
6396 		return PCI_ERS_RESULT_NEED_RESET;
6397 	case pci_channel_io_perm_failure:
6398 		/* Permanent error, prepare for device removal */
6399 		return PCI_ERS_RESULT_DISCONNECT;
6400 	}
6401 
6402 	return PCI_ERS_RESULT_NEED_RESET;
6403 #endif
6404 }
6405 
6406 /**
6407  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6408  * @pdev: pointer to PCI device
6409  */
amdgpu_pci_mmio_enabled(struct pci_dev * pdev)6410 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6411 {
6412 
6413 	DRM_INFO("PCI error: mmio enabled callback!!\n");
6414 
6415 	/* TODO - dump whatever for debugging purposes */
6416 
6417 	/* This called only if amdgpu_pci_error_detected returns
6418 	 * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6419 	 * works, no need to reset slot.
6420 	 */
6421 
6422 	return PCI_ERS_RESULT_RECOVERED;
6423 }
6424 
6425 /**
6426  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6427  * @pdev: PCI device struct
6428  *
6429  * Description: This routine is called by the pci error recovery
6430  * code after the PCI slot has been reset, just before we
6431  * should resume normal operations.
6432  */
amdgpu_pci_slot_reset(struct pci_dev * pdev)6433 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6434 {
6435 	STUB();
6436 	return PCI_ERS_RESULT_RECOVERED;
6437 #ifdef notyet
6438 	struct drm_device *dev = pci_get_drvdata(pdev);
6439 	struct amdgpu_device *adev = drm_to_adev(dev);
6440 	int r, i;
6441 	struct amdgpu_reset_context reset_context;
6442 	u32 memsize;
6443 	struct list_head device_list;
6444 
6445 	/* PCI error slot reset should be skipped During RAS recovery */
6446 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
6447 	    amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
6448 	    amdgpu_ras_in_recovery(adev))
6449 		return PCI_ERS_RESULT_RECOVERED;
6450 
6451 	DRM_INFO("PCI error: slot reset callback!!\n");
6452 
6453 	memset(&reset_context, 0, sizeof(reset_context));
6454 
6455 	INIT_LIST_HEAD(&device_list);
6456 	list_add_tail(&adev->reset_list, &device_list);
6457 
6458 	/* wait for asic to come out of reset */
6459 	drm_msleep(500);
6460 
6461 	/* Restore PCI confspace */
6462 	amdgpu_device_load_pci_state(pdev);
6463 
6464 	/* confirm  ASIC came out of reset */
6465 	for (i = 0; i < adev->usec_timeout; i++) {
6466 		memsize = amdgpu_asic_get_config_memsize(adev);
6467 
6468 		if (memsize != 0xffffffff)
6469 			break;
6470 		udelay(1);
6471 	}
6472 	if (memsize == 0xffffffff) {
6473 		r = -ETIME;
6474 		goto out;
6475 	}
6476 
6477 	reset_context.method = AMD_RESET_METHOD_NONE;
6478 	reset_context.reset_req_dev = adev;
6479 	set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
6480 	set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
6481 
6482 	adev->no_hw_access = true;
6483 	r = amdgpu_device_pre_asic_reset(adev, &reset_context);
6484 	adev->no_hw_access = false;
6485 	if (r)
6486 		goto out;
6487 
6488 	r = amdgpu_do_asic_reset(&device_list, &reset_context);
6489 
6490 out:
6491 	if (!r) {
6492 		if (amdgpu_device_cache_pci_state(adev->pdev))
6493 			pci_restore_state(adev->pdev);
6494 
6495 		DRM_INFO("PCIe error recovery succeeded\n");
6496 	} else {
6497 		DRM_ERROR("PCIe error recovery failed, err:%d", r);
6498 		amdgpu_device_unset_mp1_state(adev);
6499 		amdgpu_device_unlock_reset_domain(adev->reset_domain);
6500 	}
6501 
6502 	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6503 #endif
6504 }
6505 
6506 /**
6507  * amdgpu_pci_resume() - resume normal ops after PCI reset
6508  * @pdev: pointer to PCI device
6509  *
6510  * Called when the error recovery driver tells us that its
6511  * OK to resume normal operation.
6512  */
amdgpu_pci_resume(struct pci_dev * pdev)6513 void amdgpu_pci_resume(struct pci_dev *pdev)
6514 {
6515 	STUB();
6516 #ifdef notyet
6517 	struct drm_device *dev = pci_get_drvdata(pdev);
6518 	struct amdgpu_device *adev = drm_to_adev(dev);
6519 	int i;
6520 
6521 
6522 	DRM_INFO("PCI error: resume callback!!\n");
6523 
6524 	/* Only continue execution for the case of pci_channel_io_frozen */
6525 	if (adev->pci_channel_state != pci_channel_io_frozen)
6526 		return;
6527 
6528 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
6529 		struct amdgpu_ring *ring = adev->rings[i];
6530 
6531 		if (!amdgpu_ring_sched_ready(ring))
6532 			continue;
6533 
6534 		drm_sched_start(&ring->sched);
6535 	}
6536 
6537 	amdgpu_device_unset_mp1_state(adev);
6538 	amdgpu_device_unlock_reset_domain(adev->reset_domain);
6539 #endif
6540 }
6541 
amdgpu_device_cache_pci_state(struct pci_dev * pdev)6542 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6543 {
6544 	return false;
6545 #ifdef notyet
6546 	struct drm_device *dev = pci_get_drvdata(pdev);
6547 	struct amdgpu_device *adev = drm_to_adev(dev);
6548 	int r;
6549 
6550 	if (amdgpu_sriov_vf(adev))
6551 		return false;
6552 
6553 	r = pci_save_state(pdev);
6554 	if (!r) {
6555 		kfree(adev->pci_state);
6556 
6557 		adev->pci_state = pci_store_saved_state(pdev);
6558 
6559 		if (!adev->pci_state) {
6560 			DRM_ERROR("Failed to store PCI saved state");
6561 			return false;
6562 		}
6563 	} else {
6564 		DRM_WARN("Failed to save PCI state, err:%d\n", r);
6565 		return false;
6566 	}
6567 
6568 	return true;
6569 #endif
6570 }
6571 
amdgpu_device_load_pci_state(struct pci_dev * pdev)6572 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6573 {
6574 	STUB();
6575 	return false;
6576 #ifdef notyet
6577 	struct drm_device *dev = pci_get_drvdata(pdev);
6578 	struct amdgpu_device *adev = drm_to_adev(dev);
6579 	int r;
6580 
6581 	if (!adev->pci_state)
6582 		return false;
6583 
6584 	r = pci_load_saved_state(pdev, adev->pci_state);
6585 
6586 	if (!r) {
6587 		pci_restore_state(pdev);
6588 	} else {
6589 		DRM_WARN("Failed to load PCI state, err:%d\n", r);
6590 		return false;
6591 	}
6592 
6593 	return true;
6594 #endif
6595 }
6596 
amdgpu_device_flush_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)6597 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6598 		struct amdgpu_ring *ring)
6599 {
6600 #ifdef CONFIG_X86_64
6601 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6602 		return;
6603 #endif
6604 	if (adev->gmc.xgmi.connected_to_cpu)
6605 		return;
6606 
6607 	if (ring && ring->funcs->emit_hdp_flush)
6608 		amdgpu_ring_emit_hdp_flush(ring);
6609 	else
6610 		amdgpu_asic_flush_hdp(adev, ring);
6611 }
6612 
amdgpu_device_invalidate_hdp(struct amdgpu_device * adev,struct amdgpu_ring * ring)6613 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
6614 		struct amdgpu_ring *ring)
6615 {
6616 #ifdef CONFIG_X86_64
6617 	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6618 		return;
6619 #endif
6620 	if (adev->gmc.xgmi.connected_to_cpu)
6621 		return;
6622 
6623 	amdgpu_asic_invalidate_hdp(adev, ring);
6624 }
6625 
amdgpu_in_reset(struct amdgpu_device * adev)6626 int amdgpu_in_reset(struct amdgpu_device *adev)
6627 {
6628 	return atomic_read(&adev->reset_domain->in_gpu_reset);
6629 }
6630 
6631 /**
6632  * amdgpu_device_halt() - bring hardware to some kind of halt state
6633  *
6634  * @adev: amdgpu_device pointer
6635  *
6636  * Bring hardware to some kind of halt state so that no one can touch it
6637  * any more. It will help to maintain error context when error occurred.
6638  * Compare to a simple hang, the system will keep stable at least for SSH
6639  * access. Then it should be trivial to inspect the hardware state and
6640  * see what's going on. Implemented as following:
6641  *
6642  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
6643  *    clears all CPU mappings to device, disallows remappings through page faults
6644  * 2. amdgpu_irq_disable_all() disables all interrupts
6645  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
6646  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
6647  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
6648  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
6649  *    flush any in flight DMA operations
6650  */
amdgpu_device_halt(struct amdgpu_device * adev)6651 void amdgpu_device_halt(struct amdgpu_device *adev)
6652 {
6653 	struct pci_dev *pdev = adev->pdev;
6654 	struct drm_device *ddev = adev_to_drm(adev);
6655 
6656 	amdgpu_xcp_dev_unplug(adev);
6657 	drm_dev_unplug(ddev);
6658 
6659 	amdgpu_irq_disable_all(adev);
6660 
6661 	amdgpu_fence_driver_hw_fini(adev);
6662 
6663 	adev->no_hw_access = true;
6664 
6665 	amdgpu_device_unmap_mmio(adev);
6666 
6667 	pci_disable_device(pdev);
6668 	pci_wait_for_pending_transaction(pdev);
6669 }
6670 
amdgpu_device_pcie_port_rreg(struct amdgpu_device * adev,u32 reg)6671 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
6672 				u32 reg)
6673 {
6674 	unsigned long flags, address, data;
6675 	u32 r;
6676 
6677 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6678 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6679 
6680 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6681 	WREG32(address, reg * 4);
6682 	(void)RREG32(address);
6683 	r = RREG32(data);
6684 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6685 	return r;
6686 }
6687 
amdgpu_device_pcie_port_wreg(struct amdgpu_device * adev,u32 reg,u32 v)6688 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
6689 				u32 reg, u32 v)
6690 {
6691 	unsigned long flags, address, data;
6692 
6693 	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
6694 	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
6695 
6696 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
6697 	WREG32(address, reg * 4);
6698 	(void)RREG32(address);
6699 	WREG32(data, v);
6700 	(void)RREG32(data);
6701 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
6702 }
6703 
6704 /**
6705  * amdgpu_device_get_gang - return a reference to the current gang
6706  * @adev: amdgpu_device pointer
6707  *
6708  * Returns: A new reference to the current gang leader.
6709  */
amdgpu_device_get_gang(struct amdgpu_device * adev)6710 struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev)
6711 {
6712 	struct dma_fence *fence;
6713 
6714 	rcu_read_lock();
6715 	fence = dma_fence_get_rcu_safe(&adev->gang_submit);
6716 	rcu_read_unlock();
6717 	return fence;
6718 }
6719 
6720 /**
6721  * amdgpu_device_switch_gang - switch to a new gang
6722  * @adev: amdgpu_device pointer
6723  * @gang: the gang to switch to
6724  *
6725  * Try to switch to a new gang.
6726  * Returns: NULL if we switched to the new gang or a reference to the current
6727  * gang leader.
6728  */
amdgpu_device_switch_gang(struct amdgpu_device * adev,struct dma_fence * gang)6729 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6730 					    struct dma_fence *gang)
6731 {
6732 	struct dma_fence *old = NULL;
6733 
6734 	do {
6735 		dma_fence_put(old);
6736 		old = amdgpu_device_get_gang(adev);
6737 		if (old == gang)
6738 			break;
6739 
6740 		if (!dma_fence_is_signaled(old))
6741 			return old;
6742 
6743 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6744 			 old, gang) != old);
6745 
6746 	dma_fence_put(old);
6747 	return NULL;
6748 }
6749 
amdgpu_device_has_display_hardware(struct amdgpu_device * adev)6750 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6751 {
6752 	switch (adev->asic_type) {
6753 #ifdef CONFIG_DRM_AMDGPU_SI
6754 	case CHIP_HAINAN:
6755 #endif
6756 	case CHIP_TOPAZ:
6757 		/* chips with no display hardware */
6758 		return false;
6759 #ifdef CONFIG_DRM_AMDGPU_SI
6760 	case CHIP_TAHITI:
6761 	case CHIP_PITCAIRN:
6762 	case CHIP_VERDE:
6763 	case CHIP_OLAND:
6764 #endif
6765 #ifdef CONFIG_DRM_AMDGPU_CIK
6766 	case CHIP_BONAIRE:
6767 	case CHIP_HAWAII:
6768 	case CHIP_KAVERI:
6769 	case CHIP_KABINI:
6770 	case CHIP_MULLINS:
6771 #endif
6772 	case CHIP_TONGA:
6773 	case CHIP_FIJI:
6774 	case CHIP_POLARIS10:
6775 	case CHIP_POLARIS11:
6776 	case CHIP_POLARIS12:
6777 	case CHIP_VEGAM:
6778 	case CHIP_CARRIZO:
6779 	case CHIP_STONEY:
6780 		/* chips with display hardware */
6781 		return true;
6782 	default:
6783 		/* IP discovery */
6784 		if (!amdgpu_ip_version(adev, DCE_HWIP, 0) ||
6785 		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6786 			return false;
6787 		return true;
6788 	}
6789 }
6790 
amdgpu_device_wait_on_rreg(struct amdgpu_device * adev,uint32_t inst,uint32_t reg_addr,char reg_name[],uint32_t expected_value,uint32_t mask)6791 uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
6792 		uint32_t inst, uint32_t reg_addr, char reg_name[],
6793 		uint32_t expected_value, uint32_t mask)
6794 {
6795 	uint32_t ret = 0;
6796 	uint32_t old_ = 0;
6797 	uint32_t tmp_ = RREG32(reg_addr);
6798 	uint32_t loop = adev->usec_timeout;
6799 
6800 	while ((tmp_ & (mask)) != (expected_value)) {
6801 		if (old_ != tmp_) {
6802 			loop = adev->usec_timeout;
6803 			old_ = tmp_;
6804 		} else
6805 			udelay(1);
6806 		tmp_ = RREG32(reg_addr);
6807 		loop--;
6808 		if (!loop) {
6809 			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
6810 				  inst, reg_name, (uint32_t)expected_value,
6811 				  (uint32_t)(tmp_ & (mask)));
6812 			ret = -ETIMEDOUT;
6813 			break;
6814 		}
6815 	}
6816 	return ret;
6817 }
6818