1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/firmware.h>
27 #include <linux/pm_runtime.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_rlc.h"
32 #include "amdgpu_ras.h"
33 #include "amdgpu_reset.h"
34 #include "amdgpu_xcp.h"
35 #include "amdgpu_xgmi.h"
36 
37 /* delay 0.1 second to enable gfx off feature */
38 #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
39 
40 #define GFX_OFF_NO_DELAY 0
41 
42 /*
43  * GPU GFX IP block helpers function.
44  */
45 
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)46 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47 				int pipe, int queue)
48 {
49 	int bit = 0;
50 
51 	bit += mec * adev->gfx.mec.num_pipe_per_mec
52 		* adev->gfx.mec.num_queue_per_pipe;
53 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54 	bit += queue;
55 
56 	return bit;
57 }
58 
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)59 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60 				 int *mec, int *pipe, int *queue)
61 {
62 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
63 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64 		% adev->gfx.mec.num_pipe_per_mec;
65 	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66 	       / adev->gfx.mec.num_pipe_per_mec;
67 
68 }
69 
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71 				     int xcc_id, int mec, int pipe, int queue)
72 {
73 	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74 			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75 }
76 
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)77 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
78 			       int me, int pipe, int queue)
79 {
80 	int bit = 0;
81 
82 	bit += me * adev->gfx.me.num_pipe_per_me
83 		* adev->gfx.me.num_queue_per_pipe;
84 	bit += pipe * adev->gfx.me.num_queue_per_pipe;
85 	bit += queue;
86 
87 	return bit;
88 }
89 
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)90 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
91 				int *me, int *pipe, int *queue)
92 {
93 	*queue = bit % adev->gfx.me.num_queue_per_pipe;
94 	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
95 		% adev->gfx.me.num_pipe_per_me;
96 	*me = (bit / adev->gfx.me.num_queue_per_pipe)
97 		/ adev->gfx.me.num_pipe_per_me;
98 }
99 
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)100 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
101 				    int me, int pipe, int queue)
102 {
103 	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
104 			adev->gfx.me.queue_bitmap);
105 }
106 
107 /**
108  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
109  *
110  * @mask: array in which the per-shader array disable masks will be stored
111  * @max_se: number of SEs
112  * @max_sh: number of SHs
113  *
114  * The bitmask of CUs to be disabled in the shader array determined by se and
115  * sh is stored in mask[se * max_sh + sh].
116  */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)117 void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
118 {
119 	unsigned int se, sh, cu;
120 	const char *p;
121 
122 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
123 
124 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
125 		return;
126 
127 #ifdef notyet
128 	p = amdgpu_disable_cu;
129 	for (;;) {
130 		char *next;
131 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
132 
133 		if (ret < 3) {
134 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
135 			return;
136 		}
137 
138 		if (se < max_se && sh < max_sh && cu < 16) {
139 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
140 			mask[se * max_sh + sh] |= 1u << cu;
141 		} else {
142 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
143 				  se, sh, cu);
144 		}
145 
146 		next = strchr(p, ',');
147 		if (!next)
148 			break;
149 		p = next + 1;
150 	}
151 #endif
152 }
153 
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)154 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
155 {
156 	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
157 }
158 
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)159 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
160 {
161 	if (amdgpu_compute_multipipe != -1) {
162 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
163 			 amdgpu_compute_multipipe);
164 		return amdgpu_compute_multipipe == 1;
165 	}
166 
167 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
168 		return true;
169 
170 	/* FIXME: spreading the queues across pipes causes perf regressions
171 	 * on POLARIS11 compute workloads */
172 	if (adev->asic_type == CHIP_POLARIS11)
173 		return false;
174 
175 	return adev->gfx.mec.num_mec > 1;
176 }
177 
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)178 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
179 						struct amdgpu_ring *ring)
180 {
181 	int queue = ring->queue;
182 	int pipe = ring->pipe;
183 
184 	/* Policy: use pipe1 queue0 as high priority graphics queue if we
185 	 * have more than one gfx pipe.
186 	 */
187 	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
188 	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
189 		int me = ring->me;
190 		int bit;
191 
192 		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
193 		if (ring == &adev->gfx.gfx_ring[bit])
194 			return true;
195 	}
196 
197 	return false;
198 }
199 
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)200 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
201 					       struct amdgpu_ring *ring)
202 {
203 	/* Policy: use 1st queue as high priority compute queue if we
204 	 * have more than one compute queue.
205 	 */
206 	if (adev->gfx.num_compute_rings > 1 &&
207 	    ring == &adev->gfx.compute_ring[0])
208 		return true;
209 
210 	return false;
211 }
212 
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)213 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
214 {
215 	int i, j, queue, pipe;
216 	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
217 	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
218 				     adev->gfx.mec.num_queue_per_pipe,
219 				     adev->gfx.num_compute_rings);
220 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
221 
222 	if (multipipe_policy) {
223 		/* policy: make queues evenly cross all pipes on MEC1 only
224 		 * for multiple xcc, just use the original policy for simplicity */
225 		for (j = 0; j < num_xcc; j++) {
226 			for (i = 0; i < max_queues_per_mec; i++) {
227 				pipe = i % adev->gfx.mec.num_pipe_per_mec;
228 				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
229 					 adev->gfx.mec.num_queue_per_pipe;
230 
231 				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
232 					adev->gfx.mec_bitmap[j].queue_bitmap);
233 			}
234 		}
235 	} else {
236 		/* policy: amdgpu owns all queues in the given pipe */
237 		for (j = 0; j < num_xcc; j++) {
238 			for (i = 0; i < max_queues_per_mec; ++i)
239 				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
240 		}
241 	}
242 
243 	for (j = 0; j < num_xcc; j++) {
244 		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
245 			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
246 	}
247 }
248 
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)249 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
250 {
251 	int i, queue, pipe;
252 	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
253 	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
254 					adev->gfx.me.num_queue_per_pipe;
255 
256 	if (multipipe_policy) {
257 		/* policy: amdgpu owns the first queue per pipe at this stage
258 		 * will extend to mulitple queues per pipe later */
259 		for (i = 0; i < max_queues_per_me; i++) {
260 			pipe = i % adev->gfx.me.num_pipe_per_me;
261 			queue = (i / adev->gfx.me.num_pipe_per_me) %
262 				adev->gfx.me.num_queue_per_pipe;
263 
264 			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
265 				adev->gfx.me.queue_bitmap);
266 		}
267 	} else {
268 		for (i = 0; i < max_queues_per_me; ++i)
269 			set_bit(i, adev->gfx.me.queue_bitmap);
270 	}
271 
272 	/* update the number of active graphics rings */
273 	adev->gfx.num_gfx_rings =
274 		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
275 }
276 
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)277 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
278 				  struct amdgpu_ring *ring, int xcc_id)
279 {
280 	int queue_bit;
281 	int mec, pipe, queue;
282 
283 	queue_bit = adev->gfx.mec.num_mec
284 		    * adev->gfx.mec.num_pipe_per_mec
285 		    * adev->gfx.mec.num_queue_per_pipe;
286 
287 	while (--queue_bit >= 0) {
288 		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
289 			continue;
290 
291 		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
292 
293 		/*
294 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
295 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
296 		 * only can be issued on queue 0.
297 		 */
298 		if ((mec == 1 && pipe > 1) || queue != 0)
299 			continue;
300 
301 		ring->me = mec + 1;
302 		ring->pipe = pipe;
303 		ring->queue = queue;
304 
305 		return 0;
306 	}
307 
308 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
309 	return -EINVAL;
310 }
311 
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)312 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
313 {
314 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
315 	struct amdgpu_irq_src *irq = &kiq->irq;
316 	struct amdgpu_ring *ring = &kiq->ring;
317 	int r = 0;
318 
319 	mtx_init(&kiq->ring_lock, IPL_TTY);
320 
321 	ring->adev = NULL;
322 	ring->ring_obj = NULL;
323 	ring->use_doorbell = true;
324 	ring->xcc_id = xcc_id;
325 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
326 	ring->doorbell_index =
327 		(adev->doorbell_index.kiq +
328 		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
329 		<< 1;
330 
331 	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
332 	if (r)
333 		return r;
334 
335 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
336 	ring->no_scheduler = true;
337 	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
338 		 (unsigned char)xcc_id, (unsigned char)ring->me,
339 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
340 	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
341 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
342 	if (r)
343 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
344 
345 	return r;
346 }
347 
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)348 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
349 {
350 	amdgpu_ring_fini(ring);
351 }
352 
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)353 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
354 {
355 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
356 
357 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
358 }
359 
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)360 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
361 			unsigned int hpd_size, int xcc_id)
362 {
363 	int r;
364 	u32 *hpd;
365 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
366 
367 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
368 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
369 				    &kiq->eop_gpu_addr, (void **)&hpd);
370 	if (r) {
371 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
372 		return r;
373 	}
374 
375 	memset(hpd, 0, hpd_size);
376 
377 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
378 	if (unlikely(r != 0))
379 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
380 	amdgpu_bo_kunmap(kiq->eop_obj);
381 	amdgpu_bo_unreserve(kiq->eop_obj);
382 
383 	return 0;
384 }
385 
386 /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)387 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
388 			   unsigned int mqd_size, int xcc_id)
389 {
390 	int r, i, j;
391 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
392 	struct amdgpu_ring *ring = &kiq->ring;
393 	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
394 
395 #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
396 	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
397 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
398 		domain |= AMDGPU_GEM_DOMAIN_VRAM;
399 #endif
400 
401 	/* create MQD for KIQ */
402 	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
403 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
404 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
405 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
406 		 * KIQ MQD no matter SRIOV or Bare-metal
407 		 */
408 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
409 					    AMDGPU_GEM_DOMAIN_VRAM |
410 					    AMDGPU_GEM_DOMAIN_GTT,
411 					    &ring->mqd_obj,
412 					    &ring->mqd_gpu_addr,
413 					    &ring->mqd_ptr);
414 		if (r) {
415 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
416 			return r;
417 		}
418 
419 		/* prepare MQD backup */
420 		kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
421 		if (!kiq->mqd_backup) {
422 			dev_warn(adev->dev,
423 				 "no memory to create MQD backup for ring %s\n", ring->name);
424 			return -ENOMEM;
425 		}
426 	}
427 
428 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
429 		/* create MQD for each KGQ */
430 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
431 			ring = &adev->gfx.gfx_ring[i];
432 			if (!ring->mqd_obj) {
433 				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
434 							    domain, &ring->mqd_obj,
435 							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
436 				if (r) {
437 					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
438 					return r;
439 				}
440 
441 				ring->mqd_size = mqd_size;
442 				/* prepare MQD backup */
443 				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
444 				if (!adev->gfx.me.mqd_backup[i]) {
445 					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
446 					return -ENOMEM;
447 				}
448 			}
449 		}
450 	}
451 
452 	/* create MQD for each KCQ */
453 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
454 		j = i + xcc_id * adev->gfx.num_compute_rings;
455 		ring = &adev->gfx.compute_ring[j];
456 		if (!ring->mqd_obj) {
457 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
458 						    domain, &ring->mqd_obj,
459 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
460 			if (r) {
461 				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
462 				return r;
463 			}
464 
465 			ring->mqd_size = mqd_size;
466 			/* prepare MQD backup */
467 			adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
468 			if (!adev->gfx.mec.mqd_backup[j]) {
469 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
470 				return -ENOMEM;
471 			}
472 		}
473 	}
474 
475 	return 0;
476 }
477 
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)478 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
479 {
480 	struct amdgpu_ring *ring = NULL;
481 	int i, j;
482 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
483 
484 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
485 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
486 			ring = &adev->gfx.gfx_ring[i];
487 			kfree(adev->gfx.me.mqd_backup[i]);
488 			amdgpu_bo_free_kernel(&ring->mqd_obj,
489 					      &ring->mqd_gpu_addr,
490 					      &ring->mqd_ptr);
491 		}
492 	}
493 
494 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
495 		j = i + xcc_id * adev->gfx.num_compute_rings;
496 		ring = &adev->gfx.compute_ring[j];
497 		kfree(adev->gfx.mec.mqd_backup[j]);
498 		amdgpu_bo_free_kernel(&ring->mqd_obj,
499 				      &ring->mqd_gpu_addr,
500 				      &ring->mqd_ptr);
501 	}
502 
503 	ring = &kiq->ring;
504 	kfree(kiq->mqd_backup);
505 	amdgpu_bo_free_kernel(&ring->mqd_obj,
506 			      &ring->mqd_gpu_addr,
507 			      &ring->mqd_ptr);
508 }
509 
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)510 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
511 {
512 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
513 	struct amdgpu_ring *kiq_ring = &kiq->ring;
514 	int i, r = 0;
515 	int j;
516 
517 	if (adev->enable_mes) {
518 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
519 			j = i + xcc_id * adev->gfx.num_compute_rings;
520 			amdgpu_mes_unmap_legacy_queue(adev,
521 						   &adev->gfx.compute_ring[j],
522 						   RESET_QUEUES, 0, 0);
523 		}
524 		return 0;
525 	}
526 
527 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
528 		return -EINVAL;
529 
530 	if (!kiq_ring->sched.ready || adev->job_hang)
531 		return 0;
532 	/**
533 	 * This is workaround: only skip kiq_ring test
534 	 * during ras recovery in suspend stage for gfx9.4.3
535 	 */
536 	if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
537 	     amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
538 	    amdgpu_ras_in_recovery(adev))
539 		return 0;
540 
541 	spin_lock(&kiq->ring_lock);
542 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
543 					adev->gfx.num_compute_rings)) {
544 		spin_unlock(&kiq->ring_lock);
545 		return -ENOMEM;
546 	}
547 
548 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
549 		j = i + xcc_id * adev->gfx.num_compute_rings;
550 		kiq->pmf->kiq_unmap_queues(kiq_ring,
551 					   &adev->gfx.compute_ring[j],
552 					   RESET_QUEUES, 0, 0);
553 	}
554 	/* Submit unmap queue packet */
555 	amdgpu_ring_commit(kiq_ring);
556 	/*
557 	 * Ring test will do a basic scratch register change check. Just run
558 	 * this to ensure that unmap queues that is submitted before got
559 	 * processed successfully before returning.
560 	 */
561 	r = amdgpu_ring_test_helper(kiq_ring);
562 
563 	spin_unlock(&kiq->ring_lock);
564 
565 	return r;
566 }
567 
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)568 int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
569 {
570 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
571 	struct amdgpu_ring *kiq_ring = &kiq->ring;
572 	int i, r = 0;
573 	int j;
574 
575 	if (adev->enable_mes) {
576 		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
577 			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
578 				j = i + xcc_id * adev->gfx.num_gfx_rings;
579 				amdgpu_mes_unmap_legacy_queue(adev,
580 						      &adev->gfx.gfx_ring[j],
581 						      PREEMPT_QUEUES, 0, 0);
582 			}
583 		}
584 		return 0;
585 	}
586 
587 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
588 		return -EINVAL;
589 
590 	if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
591 		return 0;
592 
593 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
594 		spin_lock(&kiq->ring_lock);
595 		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
596 						adev->gfx.num_gfx_rings)) {
597 			spin_unlock(&kiq->ring_lock);
598 			return -ENOMEM;
599 		}
600 
601 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
602 			j = i + xcc_id * adev->gfx.num_gfx_rings;
603 			kiq->pmf->kiq_unmap_queues(kiq_ring,
604 						   &adev->gfx.gfx_ring[j],
605 						   PREEMPT_QUEUES, 0, 0);
606 		}
607 		/* Submit unmap queue packet */
608 		amdgpu_ring_commit(kiq_ring);
609 
610 		/*
611 		 * Ring test will do a basic scratch register change check.
612 		 * Just run this to ensure that unmap queues that is submitted
613 		 * before got processed successfully before returning.
614 		 */
615 		r = amdgpu_ring_test_helper(kiq_ring);
616 		spin_unlock(&kiq->ring_lock);
617 	}
618 
619 	return r;
620 }
621 
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)622 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
623 					int queue_bit)
624 {
625 	int mec, pipe, queue;
626 	int set_resource_bit = 0;
627 
628 	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
629 
630 	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
631 
632 	return set_resource_bit;
633 }
634 
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)635 static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
636 {
637 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
638 	struct amdgpu_ring *kiq_ring = &kiq->ring;
639 	uint64_t queue_mask = ~0ULL;
640 	int r, i, j;
641 
642 	amdgpu_device_flush_hdp(adev, NULL);
643 
644 	if (!adev->enable_uni_mes) {
645 		spin_lock(&kiq->ring_lock);
646 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
647 		if (r) {
648 			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
649 			spin_unlock(&kiq->ring_lock);
650 			return r;
651 		}
652 
653 		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
654 		r = amdgpu_ring_test_helper(kiq_ring);
655 		spin_unlock(&kiq->ring_lock);
656 		if (r)
657 			dev_err(adev->dev, "KIQ failed to set resources\n");
658 	}
659 
660 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
661 		j = i + xcc_id * adev->gfx.num_compute_rings;
662 		r = amdgpu_mes_map_legacy_queue(adev,
663 						&adev->gfx.compute_ring[j]);
664 		if (r) {
665 			dev_err(adev->dev, "failed to map compute queue\n");
666 			return r;
667 		}
668 	}
669 
670 	return 0;
671 }
672 
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)673 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
674 {
675 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
676 	struct amdgpu_ring *kiq_ring = &kiq->ring;
677 	uint64_t queue_mask = 0;
678 	int r, i, j;
679 
680 	if (adev->mes.enable_legacy_queue_map)
681 		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
682 
683 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
684 		return -EINVAL;
685 
686 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
687 		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
688 			continue;
689 
690 		/* This situation may be hit in the future if a new HW
691 		 * generation exposes more than 64 queues. If so, the
692 		 * definition of queue_mask needs updating */
693 		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
694 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
695 			break;
696 		}
697 
698 		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
699 	}
700 
701 	amdgpu_device_flush_hdp(adev, NULL);
702 
703 	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
704 		 kiq_ring->queue);
705 
706 	spin_lock(&kiq->ring_lock);
707 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
708 					adev->gfx.num_compute_rings +
709 					kiq->pmf->set_resources_size);
710 	if (r) {
711 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
712 		spin_unlock(&kiq->ring_lock);
713 		return r;
714 	}
715 
716 	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
717 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
718 		j = i + xcc_id * adev->gfx.num_compute_rings;
719 		kiq->pmf->kiq_map_queues(kiq_ring,
720 					 &adev->gfx.compute_ring[j]);
721 	}
722 	/* Submit map queue packet */
723 	amdgpu_ring_commit(kiq_ring);
724 	/*
725 	 * Ring test will do a basic scratch register change check. Just run
726 	 * this to ensure that map queues that is submitted before got
727 	 * processed successfully before returning.
728 	 */
729 	r = amdgpu_ring_test_helper(kiq_ring);
730 	spin_unlock(&kiq->ring_lock);
731 	if (r)
732 		DRM_ERROR("KCQ enable failed\n");
733 
734 	return r;
735 }
736 
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)737 int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
738 {
739 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
740 	struct amdgpu_ring *kiq_ring = &kiq->ring;
741 	int r, i, j;
742 
743 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
744 		return -EINVAL;
745 
746 	amdgpu_device_flush_hdp(adev, NULL);
747 
748 	if (adev->mes.enable_legacy_queue_map) {
749 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
750 			j = i + xcc_id * adev->gfx.num_gfx_rings;
751 			r = amdgpu_mes_map_legacy_queue(adev,
752 							&adev->gfx.gfx_ring[j]);
753 			if (r) {
754 				DRM_ERROR("failed to map gfx queue\n");
755 				return r;
756 			}
757 		}
758 
759 		return 0;
760 	}
761 
762 	spin_lock(&kiq->ring_lock);
763 	/* No need to map kcq on the slave */
764 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
765 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
766 						adev->gfx.num_gfx_rings);
767 		if (r) {
768 			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
769 			spin_unlock(&kiq->ring_lock);
770 			return r;
771 		}
772 
773 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
774 			j = i + xcc_id * adev->gfx.num_gfx_rings;
775 			kiq->pmf->kiq_map_queues(kiq_ring,
776 						 &adev->gfx.gfx_ring[j]);
777 		}
778 	}
779 	/* Submit map queue packet */
780 	amdgpu_ring_commit(kiq_ring);
781 	/*
782 	 * Ring test will do a basic scratch register change check. Just run
783 	 * this to ensure that map queues that is submitted before got
784 	 * processed successfully before returning.
785 	 */
786 	r = amdgpu_ring_test_helper(kiq_ring);
787 	spin_unlock(&kiq->ring_lock);
788 	if (r)
789 		DRM_ERROR("KGQ enable failed\n");
790 
791 	return r;
792 }
793 
794 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
795  *
796  * @adev: amdgpu_device pointer
797  * @bool enable true: enable gfx off feature, false: disable gfx off feature
798  *
799  * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
800  * 2. other client can send request to disable gfx off feature, the request should be honored.
801  * 3. other client can cancel their request of disable gfx off feature
802  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
803  */
804 
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)805 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
806 {
807 	unsigned long delay = GFX_OFF_DELAY_ENABLE;
808 
809 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
810 		return;
811 
812 	mutex_lock(&adev->gfx.gfx_off_mutex);
813 
814 	if (enable) {
815 		/* If the count is already 0, it means there's an imbalance bug somewhere.
816 		 * Note that the bug may be in a different caller than the one which triggers the
817 		 * WARN_ON_ONCE.
818 		 */
819 		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
820 			goto unlock;
821 
822 		adev->gfx.gfx_off_req_count--;
823 
824 		if (adev->gfx.gfx_off_req_count == 0 &&
825 		    !adev->gfx.gfx_off_state) {
826 			/* If going to s2idle, no need to wait */
827 			if (adev->in_s0ix) {
828 				if (!amdgpu_dpm_set_powergating_by_smu(adev,
829 						AMD_IP_BLOCK_TYPE_GFX, true))
830 					adev->gfx.gfx_off_state = true;
831 			} else {
832 				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
833 					      delay);
834 			}
835 		}
836 	} else {
837 		if (adev->gfx.gfx_off_req_count == 0) {
838 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
839 
840 			if (adev->gfx.gfx_off_state &&
841 			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
842 				adev->gfx.gfx_off_state = false;
843 
844 				if (adev->gfx.funcs->init_spm_golden) {
845 					dev_dbg(adev->dev,
846 						"GFXOFF is disabled, re-init SPM golden settings\n");
847 					amdgpu_gfx_init_spm_golden(adev);
848 				}
849 			}
850 		}
851 
852 		adev->gfx.gfx_off_req_count++;
853 	}
854 
855 unlock:
856 	mutex_unlock(&adev->gfx.gfx_off_mutex);
857 }
858 
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)859 int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
860 {
861 	int r = 0;
862 
863 	mutex_lock(&adev->gfx.gfx_off_mutex);
864 
865 	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
866 
867 	mutex_unlock(&adev->gfx.gfx_off_mutex);
868 
869 	return r;
870 }
871 
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)872 int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
873 {
874 	int r = 0;
875 
876 	mutex_lock(&adev->gfx.gfx_off_mutex);
877 
878 	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
879 
880 	mutex_unlock(&adev->gfx.gfx_off_mutex);
881 
882 	return r;
883 }
884 
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)885 int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
886 {
887 	int r = 0;
888 
889 	mutex_lock(&adev->gfx.gfx_off_mutex);
890 
891 	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
892 
893 	mutex_unlock(&adev->gfx.gfx_off_mutex);
894 
895 	return r;
896 }
897 
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)898 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
899 {
900 
901 	int r = 0;
902 
903 	mutex_lock(&adev->gfx.gfx_off_mutex);
904 
905 	r = amdgpu_dpm_get_status_gfxoff(adev, value);
906 
907 	mutex_unlock(&adev->gfx.gfx_off_mutex);
908 
909 	return r;
910 }
911 
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)912 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
913 {
914 	int r;
915 
916 	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
917 		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
918 			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
919 			if (r)
920 				return r;
921 		}
922 
923 		r = amdgpu_ras_block_late_init(adev, ras_block);
924 		if (r)
925 			return r;
926 
927 		if (adev->gfx.cp_ecc_error_irq.funcs) {
928 			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
929 			if (r)
930 				goto late_fini;
931 		}
932 	} else {
933 		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
934 	}
935 
936 	return 0;
937 late_fini:
938 	amdgpu_ras_block_late_fini(adev, ras_block);
939 	return r;
940 }
941 
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)942 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
943 {
944 	int err = 0;
945 	struct amdgpu_gfx_ras *ras = NULL;
946 
947 	/* adev->gfx.ras is NULL, which means gfx does not
948 	 * support ras function, then do nothing here.
949 	 */
950 	if (!adev->gfx.ras)
951 		return 0;
952 
953 	ras = adev->gfx.ras;
954 
955 	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
956 	if (err) {
957 		dev_err(adev->dev, "Failed to register gfx ras block!\n");
958 		return err;
959 	}
960 
961 	strlcpy(ras->ras_block.ras_comm.name, "gfx",
962 	    sizeof(ras->ras_block.ras_comm.name));
963 	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
964 	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
965 	adev->gfx.ras_if = &ras->ras_block.ras_comm;
966 
967 	/* If not define special ras_late_init function, use gfx default ras_late_init */
968 	if (!ras->ras_block.ras_late_init)
969 		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
970 
971 	/* If not defined special ras_cb function, use default ras_cb */
972 	if (!ras->ras_block.ras_cb)
973 		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
974 
975 	return 0;
976 }
977 
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)978 int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
979 						struct amdgpu_iv_entry *entry)
980 {
981 	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
982 		return adev->gfx.ras->poison_consumption_handler(adev, entry);
983 
984 	return 0;
985 }
986 
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)987 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
988 		void *err_data,
989 		struct amdgpu_iv_entry *entry)
990 {
991 	/* TODO ue will trigger an interrupt.
992 	 *
993 	 * When “Full RAS” is enabled, the per-IP interrupt sources should
994 	 * be disabled and the driver should only look for the aggregated
995 	 * interrupt via sync flood
996 	 */
997 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
998 		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
999 		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
1000 		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
1001 			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1002 		amdgpu_ras_reset_gpu(adev);
1003 	}
1004 	return AMDGPU_RAS_SUCCESS;
1005 }
1006 
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1007 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1008 				  struct amdgpu_irq_src *source,
1009 				  struct amdgpu_iv_entry *entry)
1010 {
1011 	struct ras_common_if *ras_if = adev->gfx.ras_if;
1012 	struct ras_dispatch_if ih_data = {
1013 		.entry = entry,
1014 	};
1015 
1016 	if (!ras_if)
1017 		return 0;
1018 
1019 	ih_data.head = *ras_if;
1020 
1021 	DRM_ERROR("CP ECC ERROR IRQ\n");
1022 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1023 	return 0;
1024 }
1025 
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1026 void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1027 		void *ras_error_status,
1028 		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1029 				int xcc_id))
1030 {
1031 	int i;
1032 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1033 	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1034 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1035 
1036 	if (err_data) {
1037 		err_data->ue_count = 0;
1038 		err_data->ce_count = 0;
1039 	}
1040 
1041 	for_each_inst(i, xcc_mask)
1042 		func(adev, ras_error_status, i);
1043 }
1044 
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)1045 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1046 {
1047 	signed long r, cnt = 0;
1048 	unsigned long flags;
1049 	uint32_t seq, reg_val_offs = 0, value = 0;
1050 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1051 	struct amdgpu_ring *ring = &kiq->ring;
1052 
1053 	if (amdgpu_device_skip_hw_access(adev))
1054 		return 0;
1055 
1056 	if (adev->mes.ring[0].sched.ready)
1057 		return amdgpu_mes_rreg(adev, reg);
1058 
1059 	BUG_ON(!ring->funcs->emit_rreg);
1060 
1061 	spin_lock_irqsave(&kiq->ring_lock, flags);
1062 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
1063 		pr_err("critical bug! too many kiq readers\n");
1064 		goto failed_unlock;
1065 	}
1066 	r = amdgpu_ring_alloc(ring, 32);
1067 	if (r)
1068 		goto failed_unlock;
1069 
1070 	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1071 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1072 	if (r)
1073 		goto failed_undo;
1074 
1075 	amdgpu_ring_commit(ring);
1076 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1077 
1078 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1079 
1080 	/* don't wait anymore for gpu reset case because this way may
1081 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1082 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1083 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1084 	 * gpu_recover() hang there.
1085 	 *
1086 	 * also don't wait anymore for IRQ context
1087 	 * */
1088 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1089 		goto failed_kiq_read;
1090 
1091 	might_sleep();
1092 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1093 		drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1094 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1095 	}
1096 
1097 	if (cnt > MAX_KIQ_REG_TRY)
1098 		goto failed_kiq_read;
1099 
1100 	mb();
1101 	value = adev->wb.wb[reg_val_offs];
1102 	amdgpu_device_wb_free(adev, reg_val_offs);
1103 	return value;
1104 
1105 failed_undo:
1106 	amdgpu_ring_undo(ring);
1107 failed_unlock:
1108 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1109 failed_kiq_read:
1110 	if (reg_val_offs)
1111 		amdgpu_device_wb_free(adev, reg_val_offs);
1112 	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1113 	return ~0;
1114 }
1115 
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)1116 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1117 {
1118 	signed long r, cnt = 0;
1119 	unsigned long flags;
1120 	uint32_t seq;
1121 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1122 	struct amdgpu_ring *ring = &kiq->ring;
1123 
1124 	BUG_ON(!ring->funcs->emit_wreg);
1125 
1126 	if (amdgpu_device_skip_hw_access(adev))
1127 		return;
1128 
1129 	if (adev->mes.ring[0].sched.ready) {
1130 		amdgpu_mes_wreg(adev, reg, v);
1131 		return;
1132 	}
1133 
1134 	spin_lock_irqsave(&kiq->ring_lock, flags);
1135 	r = amdgpu_ring_alloc(ring, 32);
1136 	if (r)
1137 		goto failed_unlock;
1138 
1139 	amdgpu_ring_emit_wreg(ring, reg, v);
1140 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1141 	if (r)
1142 		goto failed_undo;
1143 
1144 	amdgpu_ring_commit(ring);
1145 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1146 
1147 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1148 
1149 	/* don't wait anymore for gpu reset case because this way may
1150 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1151 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1152 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1153 	 * gpu_recover() hang there.
1154 	 *
1155 	 * also don't wait anymore for IRQ context
1156 	 * */
1157 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1158 		goto failed_kiq_write;
1159 
1160 	might_sleep();
1161 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1162 
1163 		drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1164 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1165 	}
1166 
1167 	if (cnt > MAX_KIQ_REG_TRY)
1168 		goto failed_kiq_write;
1169 
1170 	return;
1171 
1172 failed_undo:
1173 	amdgpu_ring_undo(ring);
1174 failed_unlock:
1175 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1176 failed_kiq_write:
1177 	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1178 }
1179 
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1180 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1181 {
1182 	if (amdgpu_num_kcq == -1) {
1183 		return 8;
1184 	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1185 		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1186 		return 8;
1187 	}
1188 	return amdgpu_num_kcq;
1189 }
1190 
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1191 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1192 				  uint32_t ucode_id)
1193 {
1194 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1195 	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1196 	struct amdgpu_firmware_info *info = NULL;
1197 	const struct firmware *ucode_fw;
1198 	unsigned int fw_size;
1199 
1200 	switch (ucode_id) {
1201 	case AMDGPU_UCODE_ID_CP_PFP:
1202 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1203 			adev->gfx.pfp_fw->data;
1204 		adev->gfx.pfp_fw_version =
1205 			le32_to_cpu(cp_hdr->header.ucode_version);
1206 		adev->gfx.pfp_feature_version =
1207 			le32_to_cpu(cp_hdr->ucode_feature_version);
1208 		ucode_fw = adev->gfx.pfp_fw;
1209 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1210 		break;
1211 	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1212 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1213 			adev->gfx.pfp_fw->data;
1214 		adev->gfx.pfp_fw_version =
1215 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1216 		adev->gfx.pfp_feature_version =
1217 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1218 		ucode_fw = adev->gfx.pfp_fw;
1219 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1220 		break;
1221 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1222 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1223 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1224 			adev->gfx.pfp_fw->data;
1225 		ucode_fw = adev->gfx.pfp_fw;
1226 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1227 		break;
1228 	case AMDGPU_UCODE_ID_CP_ME:
1229 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1230 			adev->gfx.me_fw->data;
1231 		adev->gfx.me_fw_version =
1232 			le32_to_cpu(cp_hdr->header.ucode_version);
1233 		adev->gfx.me_feature_version =
1234 			le32_to_cpu(cp_hdr->ucode_feature_version);
1235 		ucode_fw = adev->gfx.me_fw;
1236 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1237 		break;
1238 	case AMDGPU_UCODE_ID_CP_RS64_ME:
1239 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1240 			adev->gfx.me_fw->data;
1241 		adev->gfx.me_fw_version =
1242 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1243 		adev->gfx.me_feature_version =
1244 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1245 		ucode_fw = adev->gfx.me_fw;
1246 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1247 		break;
1248 	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1249 	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1250 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1251 			adev->gfx.me_fw->data;
1252 		ucode_fw = adev->gfx.me_fw;
1253 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1254 		break;
1255 	case AMDGPU_UCODE_ID_CP_CE:
1256 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1257 			adev->gfx.ce_fw->data;
1258 		adev->gfx.ce_fw_version =
1259 			le32_to_cpu(cp_hdr->header.ucode_version);
1260 		adev->gfx.ce_feature_version =
1261 			le32_to_cpu(cp_hdr->ucode_feature_version);
1262 		ucode_fw = adev->gfx.ce_fw;
1263 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1264 		break;
1265 	case AMDGPU_UCODE_ID_CP_MEC1:
1266 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1267 			adev->gfx.mec_fw->data;
1268 		adev->gfx.mec_fw_version =
1269 			le32_to_cpu(cp_hdr->header.ucode_version);
1270 		adev->gfx.mec_feature_version =
1271 			le32_to_cpu(cp_hdr->ucode_feature_version);
1272 		ucode_fw = adev->gfx.mec_fw;
1273 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1274 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1275 		break;
1276 	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1277 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1278 			adev->gfx.mec_fw->data;
1279 		ucode_fw = adev->gfx.mec_fw;
1280 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1281 		break;
1282 	case AMDGPU_UCODE_ID_CP_MEC2:
1283 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1284 			adev->gfx.mec2_fw->data;
1285 		adev->gfx.mec2_fw_version =
1286 			le32_to_cpu(cp_hdr->header.ucode_version);
1287 		adev->gfx.mec2_feature_version =
1288 			le32_to_cpu(cp_hdr->ucode_feature_version);
1289 		ucode_fw = adev->gfx.mec2_fw;
1290 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1291 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1292 		break;
1293 	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1294 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1295 			adev->gfx.mec2_fw->data;
1296 		ucode_fw = adev->gfx.mec2_fw;
1297 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1298 		break;
1299 	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1300 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1301 			adev->gfx.mec_fw->data;
1302 		adev->gfx.mec_fw_version =
1303 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1304 		adev->gfx.mec_feature_version =
1305 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1306 		ucode_fw = adev->gfx.mec_fw;
1307 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1308 		break;
1309 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1310 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1311 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1312 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1313 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1314 			adev->gfx.mec_fw->data;
1315 		ucode_fw = adev->gfx.mec_fw;
1316 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1317 		break;
1318 	default:
1319 		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1320 		return;
1321 	}
1322 
1323 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1324 		info = &adev->firmware.ucode[ucode_id];
1325 		info->ucode_id = ucode_id;
1326 		info->fw = ucode_fw;
1327 		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1328 	}
1329 }
1330 
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)1331 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1332 {
1333 	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1334 			adev->gfx.num_xcc_per_xcp : 1));
1335 }
1336 
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1337 static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1338 						struct device_attribute *addr,
1339 						char *buf)
1340 {
1341 	struct drm_device *ddev = dev_get_drvdata(dev);
1342 	struct amdgpu_device *adev = drm_to_adev(ddev);
1343 	int mode;
1344 
1345 	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1346 					       AMDGPU_XCP_FL_NONE);
1347 
1348 	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1349 }
1350 
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)1351 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1352 						struct device_attribute *addr,
1353 						const char *buf, size_t count)
1354 {
1355 	struct drm_device *ddev = dev_get_drvdata(dev);
1356 	struct amdgpu_device *adev = drm_to_adev(ddev);
1357 	enum amdgpu_gfx_partition mode;
1358 	int ret = 0, num_xcc;
1359 
1360 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1361 	if (num_xcc % 2 != 0)
1362 		return -EINVAL;
1363 
1364 	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1365 		mode = AMDGPU_SPX_PARTITION_MODE;
1366 	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1367 		/*
1368 		 * DPX mode needs AIDs to be in multiple of 2.
1369 		 * Each AID connects 2 XCCs.
1370 		 */
1371 		if (num_xcc%4)
1372 			return -EINVAL;
1373 		mode = AMDGPU_DPX_PARTITION_MODE;
1374 	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1375 		if (num_xcc != 6)
1376 			return -EINVAL;
1377 		mode = AMDGPU_TPX_PARTITION_MODE;
1378 	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1379 		if (num_xcc != 8)
1380 			return -EINVAL;
1381 		mode = AMDGPU_QPX_PARTITION_MODE;
1382 	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1383 		mode = AMDGPU_CPX_PARTITION_MODE;
1384 	} else {
1385 		return -EINVAL;
1386 	}
1387 
1388 	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1389 
1390 	if (ret)
1391 		return ret;
1392 
1393 	return count;
1394 }
1395 
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1396 static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1397 						struct device_attribute *addr,
1398 						char *buf)
1399 {
1400 	struct drm_device *ddev = dev_get_drvdata(dev);
1401 	struct amdgpu_device *adev = drm_to_adev(ddev);
1402 	char *supported_partition;
1403 
1404 	/* TBD */
1405 	switch (NUM_XCC(adev->gfx.xcc_mask)) {
1406 	case 8:
1407 		supported_partition = "SPX, DPX, QPX, CPX";
1408 		break;
1409 	case 6:
1410 		supported_partition = "SPX, TPX, CPX";
1411 		break;
1412 	case 4:
1413 		supported_partition = "SPX, DPX, CPX";
1414 		break;
1415 	/* this seems only existing in emulation phase */
1416 	case 2:
1417 		supported_partition = "SPX, CPX";
1418 		break;
1419 	default:
1420 		supported_partition = "Not supported";
1421 		break;
1422 	}
1423 
1424 	return sysfs_emit(buf, "%s\n", supported_partition);
1425 }
1426 
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1427 static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1428 {
1429 	struct amdgpu_device *adev = ring->adev;
1430 	struct drm_gpu_scheduler *sched = &ring->sched;
1431 	struct drm_sched_entity entity;
1432 	struct dma_fence *f;
1433 	struct amdgpu_job *job;
1434 	struct amdgpu_ib *ib;
1435 	int i, r;
1436 
1437 	/* Initialize the scheduler entity */
1438 	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1439 				  &sched, 1, NULL);
1440 	if (r) {
1441 		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1442 		goto err;
1443 	}
1444 
1445 	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
1446 				     64, 0,
1447 				     &job);
1448 	if (r)
1449 		goto err;
1450 
1451 	job->enforce_isolation = true;
1452 
1453 	ib = &job->ibs[0];
1454 	for (i = 0; i <= ring->funcs->align_mask; ++i)
1455 		ib->ptr[i] = ring->funcs->nop;
1456 	ib->length_dw = ring->funcs->align_mask + 1;
1457 
1458 	f = amdgpu_job_submit(job);
1459 
1460 	r = dma_fence_wait(f, false);
1461 	if (r)
1462 		goto err;
1463 
1464 	dma_fence_put(f);
1465 
1466 	/* Clean up the scheduler entity */
1467 	drm_sched_entity_destroy(&entity);
1468 	return 0;
1469 
1470 err:
1471 	return r;
1472 }
1473 
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1474 static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1475 {
1476 	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1477 	struct amdgpu_ring *ring;
1478 	int num_xcc_to_clear;
1479 	int i, r, xcc_id;
1480 
1481 	if (adev->gfx.num_xcc_per_xcp)
1482 		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1483 	else
1484 		num_xcc_to_clear = 1;
1485 
1486 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1487 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1488 			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1489 			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1490 				r = amdgpu_gfx_run_cleaner_shader_job(ring);
1491 				if (r)
1492 					return r;
1493 				num_xcc_to_clear--;
1494 				break;
1495 			}
1496 		}
1497 	}
1498 
1499 	if (num_xcc_to_clear)
1500 		return -ENOENT;
1501 
1502 	return 0;
1503 }
1504 
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1505 static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1506 						 struct device_attribute *attr,
1507 						 const char *buf,
1508 						 size_t count)
1509 {
1510 	STUB();
1511 	return -ENOSYS;
1512 #ifdef notyet
1513 	struct drm_device *ddev = dev_get_drvdata(dev);
1514 	struct amdgpu_device *adev = drm_to_adev(ddev);
1515 	int ret;
1516 	long value;
1517 
1518 	if (amdgpu_in_reset(adev))
1519 		return -EPERM;
1520 	if (adev->in_suspend && !adev->in_runpm)
1521 		return -EPERM;
1522 
1523 	ret = kstrtol(buf, 0, &value);
1524 
1525 	if (ret)
1526 		return -EINVAL;
1527 
1528 	if (value < 0)
1529 		return -EINVAL;
1530 
1531 	if (adev->xcp_mgr) {
1532 		if (value >= adev->xcp_mgr->num_xcps)
1533 			return -EINVAL;
1534 	} else {
1535 		if (value > 1)
1536 			return -EINVAL;
1537 	}
1538 
1539 	ret = pm_runtime_get_sync(ddev->dev);
1540 	if (ret < 0) {
1541 		pm_runtime_put_autosuspend(ddev->dev);
1542 		return ret;
1543 	}
1544 
1545 	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1546 
1547 	pm_runtime_mark_last_busy(ddev->dev);
1548 	pm_runtime_put_autosuspend(ddev->dev);
1549 
1550 	if (ret)
1551 		return ret;
1552 
1553 	return count;
1554 #endif
1555 }
1556 
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1557 static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1558 						struct device_attribute *attr,
1559 						char *buf)
1560 {
1561 	struct drm_device *ddev = dev_get_drvdata(dev);
1562 	struct amdgpu_device *adev = drm_to_adev(ddev);
1563 	int i;
1564 	ssize_t size = 0;
1565 
1566 	if (adev->xcp_mgr) {
1567 		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1568 			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1569 			if (i < (adev->xcp_mgr->num_xcps - 1))
1570 				size += sysfs_emit_at(buf, size, " ");
1571 		}
1572 		buf[size++] = '\n';
1573 	} else {
1574 		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1575 	}
1576 
1577 	return size;
1578 }
1579 
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1580 static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1581 						struct device_attribute *attr,
1582 						const char *buf, size_t count)
1583 {
1584 	STUB();
1585 	return -ENOSYS;
1586 #ifdef notyet
1587 	struct drm_device *ddev = dev_get_drvdata(dev);
1588 	struct amdgpu_device *adev = drm_to_adev(ddev);
1589 	long partition_values[MAX_XCP] = {0};
1590 	int ret, i, num_partitions;
1591 	const char *input_buf = buf;
1592 
1593 	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1594 		ret = sscanf(input_buf, "%ld", &partition_values[i]);
1595 		if (ret <= 0)
1596 			break;
1597 
1598 		/* Move the pointer to the next value in the string */
1599 		input_buf = strchr(input_buf, ' ');
1600 		if (input_buf) {
1601 			input_buf++;
1602 		} else {
1603 			i++;
1604 			break;
1605 		}
1606 	}
1607 	num_partitions = i;
1608 
1609 	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1610 		return -EINVAL;
1611 
1612 	if (!adev->xcp_mgr && num_partitions != 1)
1613 		return -EINVAL;
1614 
1615 	for (i = 0; i < num_partitions; i++) {
1616 		if (partition_values[i] != 0 && partition_values[i] != 1)
1617 			return -EINVAL;
1618 	}
1619 
1620 	mutex_lock(&adev->enforce_isolation_mutex);
1621 
1622 	for (i = 0; i < num_partitions; i++) {
1623 		if (adev->enforce_isolation[i] && !partition_values[i]) {
1624 			/* Going from enabled to disabled */
1625 			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1626 		} else if (!adev->enforce_isolation[i] && partition_values[i]) {
1627 			/* Going from disabled to enabled */
1628 			amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1629 		}
1630 		adev->enforce_isolation[i] = partition_values[i];
1631 	}
1632 
1633 	mutex_unlock(&adev->enforce_isolation_mutex);
1634 
1635 	return count;
1636 #endif
1637 }
1638 
1639 static DEVICE_ATTR(run_cleaner_shader, 0200,
1640 		   NULL, amdgpu_gfx_set_run_cleaner_shader);
1641 
1642 static DEVICE_ATTR(enforce_isolation, 0644,
1643 		   amdgpu_gfx_get_enforce_isolation,
1644 		   amdgpu_gfx_set_enforce_isolation);
1645 
1646 static DEVICE_ATTR(current_compute_partition, 0644,
1647 		   amdgpu_gfx_get_current_compute_partition,
1648 		   amdgpu_gfx_set_compute_partition);
1649 
1650 static DEVICE_ATTR(available_compute_partition, 0444,
1651 		   amdgpu_gfx_get_available_compute_partition, NULL);
1652 
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1653 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1654 {
1655 	int r;
1656 
1657 	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1658 	if (r)
1659 		return r;
1660 
1661 	r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
1662 
1663 	return r;
1664 }
1665 
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1666 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1667 {
1668 	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1669 	device_remove_file(adev->dev, &dev_attr_available_compute_partition);
1670 }
1671 
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1672 int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1673 {
1674 	int r;
1675 
1676 	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1677 	if (r)
1678 		return r;
1679 
1680 	r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1681 	if (r)
1682 		return r;
1683 
1684 	return 0;
1685 }
1686 
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1687 void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1688 {
1689 	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1690 	device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1691 }
1692 
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1693 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1694 				      unsigned int cleaner_shader_size)
1695 {
1696 	if (!adev->gfx.enable_cleaner_shader)
1697 		return -EOPNOTSUPP;
1698 
1699 	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1700 				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1701 				       &adev->gfx.cleaner_shader_obj,
1702 				       &adev->gfx.cleaner_shader_gpu_addr,
1703 				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1704 }
1705 
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1706 void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1707 {
1708 	if (!adev->gfx.enable_cleaner_shader)
1709 		return;
1710 
1711 	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1712 			      &adev->gfx.cleaner_shader_gpu_addr,
1713 			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1714 }
1715 
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1716 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1717 				    unsigned int cleaner_shader_size,
1718 				    const void *cleaner_shader_ptr)
1719 {
1720 	if (!adev->gfx.enable_cleaner_shader)
1721 		return;
1722 
1723 	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1724 		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1725 			    cleaner_shader_size);
1726 }
1727 
1728 /**
1729  * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1730  * @adev: amdgpu_device pointer
1731  * @idx: Index of the scheduler to control
1732  * @enable: Whether to enable or disable the KFD scheduler
1733  *
1734  * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1735  * from the KGD. It is part of the cleaner shader feature. This function plays
1736  * a key role in enforcing process isolation on the GPU.
1737  *
1738  * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1739  * track of the number of requests to enable the KFD scheduler. When a request
1740  * to enable the KFD scheduler is made, the reference count is decremented.
1741  * When the reference count reaches zero, a delayed work is scheduled to
1742  * enforce isolation after a delay of GFX_SLICE_PERIOD.
1743  *
1744  * When a request to disable the KFD scheduler is made, the function first
1745  * checks if the reference count is zero. If it is, it cancels the delayed work
1746  * for enforcing isolation and checks if the KFD scheduler is active. If the
1747  * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1748  * sets the KFD scheduler state to inactive. Then, it increments the reference
1749  * count.
1750  *
1751  * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1752  * scheduler state and reference count are updated atomically.
1753  *
1754  * Note: If the reference count is already zero when a request to enable the
1755  * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1756  * function triggers a warning in this case.
1757  */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1758 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1759 				    bool enable)
1760 {
1761 	mutex_lock(&adev->gfx.kfd_sch_mutex);
1762 
1763 	if (enable) {
1764 		/* If the count is already 0, it means there's an imbalance bug somewhere.
1765 		 * Note that the bug may be in a different caller than the one which triggers the
1766 		 * WARN_ON_ONCE.
1767 		 */
1768 		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1769 			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1770 			goto unlock;
1771 		}
1772 
1773 		adev->gfx.kfd_sch_req_count[idx]--;
1774 
1775 		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1776 		    adev->gfx.kfd_sch_inactive[idx]) {
1777 			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1778 					      GFX_SLICE_PERIOD);
1779 		}
1780 	} else {
1781 		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1782 			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1783 			if (!adev->gfx.kfd_sch_inactive[idx]) {
1784 				amdgpu_amdkfd_stop_sched(adev, idx);
1785 				adev->gfx.kfd_sch_inactive[idx] = true;
1786 			}
1787 		}
1788 
1789 		adev->gfx.kfd_sch_req_count[idx]++;
1790 	}
1791 
1792 unlock:
1793 	mutex_unlock(&adev->gfx.kfd_sch_mutex);
1794 }
1795 
1796 /**
1797  * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1798  *
1799  * @work: work_struct.
1800  *
1801  * This function is the work handler for enforcing shader isolation on AMD GPUs.
1802  * It counts the number of emitted fences for each GFX and compute ring. If there
1803  * are any fences, it schedules the `enforce_isolation_work` to be run after a
1804  * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1805  * Driver (KFD) to resume the runqueue. The function is synchronized using the
1806  * `enforce_isolation_mutex`.
1807  */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1808 void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1809 {
1810 	struct amdgpu_isolation_work *isolation_work =
1811 		container_of(work, struct amdgpu_isolation_work, work.work);
1812 	struct amdgpu_device *adev = isolation_work->adev;
1813 	u32 i, idx, fences = 0;
1814 
1815 	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1816 		idx = 0;
1817 	else
1818 		idx = isolation_work->xcp_id;
1819 
1820 	if (idx >= MAX_XCP)
1821 		return;
1822 
1823 	mutex_lock(&adev->enforce_isolation_mutex);
1824 	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1825 		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1826 			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1827 	}
1828 	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1829 		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1830 			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1831 	}
1832 	if (fences) {
1833 		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1834 				      GFX_SLICE_PERIOD);
1835 	} else {
1836 		/* Tell KFD to resume the runqueue */
1837 		if (adev->kfd.init_complete) {
1838 			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1839 			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1840 				amdgpu_amdkfd_start_sched(adev, idx);
1841 				adev->gfx.kfd_sch_inactive[idx] = false;
1842 		}
1843 	}
1844 	mutex_unlock(&adev->enforce_isolation_mutex);
1845 }
1846 
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)1847 void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1848 {
1849 	struct amdgpu_device *adev = ring->adev;
1850 	u32 idx;
1851 
1852 	if (!adev->gfx.enable_cleaner_shader)
1853 		return;
1854 
1855 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1856 		idx = 0;
1857 	else
1858 		idx = ring->xcp_id;
1859 
1860 	if (idx >= MAX_XCP)
1861 		return;
1862 
1863 	mutex_lock(&adev->enforce_isolation_mutex);
1864 	if (adev->enforce_isolation[idx]) {
1865 		if (adev->kfd.init_complete)
1866 			amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
1867 	}
1868 	mutex_unlock(&adev->enforce_isolation_mutex);
1869 }
1870 
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)1871 void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
1872 {
1873 	struct amdgpu_device *adev = ring->adev;
1874 	u32 idx;
1875 
1876 	if (!adev->gfx.enable_cleaner_shader)
1877 		return;
1878 
1879 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1880 		idx = 0;
1881 	else
1882 		idx = ring->xcp_id;
1883 
1884 	if (idx >= MAX_XCP)
1885 		return;
1886 
1887 	mutex_lock(&adev->enforce_isolation_mutex);
1888 	if (adev->enforce_isolation[idx]) {
1889 		if (adev->kfd.init_complete)
1890 			amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
1891 	}
1892 	mutex_unlock(&adev->enforce_isolation_mutex);
1893 }
1894