1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 */
25
26 #include <linux/firmware.h>
27 #include <linux/pm_runtime.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_gfx.h"
31 #include "amdgpu_rlc.h"
32 #include "amdgpu_ras.h"
33 #include "amdgpu_reset.h"
34 #include "amdgpu_xcp.h"
35 #include "amdgpu_xgmi.h"
36
37 /* delay 0.1 second to enable gfx off feature */
38 #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
39
40 #define GFX_OFF_NO_DELAY 0
41
42 /*
43 * GPU GFX IP block helpers function.
44 */
45
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)46 int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47 int pipe, int queue)
48 {
49 int bit = 0;
50
51 bit += mec * adev->gfx.mec.num_pipe_per_mec
52 * adev->gfx.mec.num_queue_per_pipe;
53 bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54 bit += queue;
55
56 return bit;
57 }
58
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)59 void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60 int *mec, int *pipe, int *queue)
61 {
62 *queue = bit % adev->gfx.mec.num_queue_per_pipe;
63 *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64 % adev->gfx.mec.num_pipe_per_mec;
65 *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66 / adev->gfx.mec.num_pipe_per_mec;
67
68 }
69
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71 int xcc_id, int mec, int pipe, int queue)
72 {
73 return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74 adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75 }
76
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)77 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
78 int me, int pipe, int queue)
79 {
80 int bit = 0;
81
82 bit += me * adev->gfx.me.num_pipe_per_me
83 * adev->gfx.me.num_queue_per_pipe;
84 bit += pipe * adev->gfx.me.num_queue_per_pipe;
85 bit += queue;
86
87 return bit;
88 }
89
amdgpu_gfx_bit_to_me_queue(struct amdgpu_device * adev,int bit,int * me,int * pipe,int * queue)90 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
91 int *me, int *pipe, int *queue)
92 {
93 *queue = bit % adev->gfx.me.num_queue_per_pipe;
94 *pipe = (bit / adev->gfx.me.num_queue_per_pipe)
95 % adev->gfx.me.num_pipe_per_me;
96 *me = (bit / adev->gfx.me.num_queue_per_pipe)
97 / adev->gfx.me.num_pipe_per_me;
98 }
99
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)100 bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
101 int me, int pipe, int queue)
102 {
103 return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
104 adev->gfx.me.queue_bitmap);
105 }
106
107 /**
108 * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
109 *
110 * @mask: array in which the per-shader array disable masks will be stored
111 * @max_se: number of SEs
112 * @max_sh: number of SHs
113 *
114 * The bitmask of CUs to be disabled in the shader array determined by se and
115 * sh is stored in mask[se * max_sh + sh].
116 */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)117 void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
118 {
119 unsigned int se, sh, cu;
120 const char *p;
121
122 memset(mask, 0, sizeof(*mask) * max_se * max_sh);
123
124 if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
125 return;
126
127 #ifdef notyet
128 p = amdgpu_disable_cu;
129 for (;;) {
130 char *next;
131 int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
132
133 if (ret < 3) {
134 DRM_ERROR("amdgpu: could not parse disable_cu\n");
135 return;
136 }
137
138 if (se < max_se && sh < max_sh && cu < 16) {
139 DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
140 mask[se * max_sh + sh] |= 1u << cu;
141 } else {
142 DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
143 se, sh, cu);
144 }
145
146 next = strchr(p, ',');
147 if (!next)
148 break;
149 p = next + 1;
150 }
151 #endif
152 }
153
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)154 static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
155 {
156 return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
157 }
158
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)159 static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
160 {
161 if (amdgpu_compute_multipipe != -1) {
162 DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
163 amdgpu_compute_multipipe);
164 return amdgpu_compute_multipipe == 1;
165 }
166
167 if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
168 return true;
169
170 /* FIXME: spreading the queues across pipes causes perf regressions
171 * on POLARIS11 compute workloads */
172 if (adev->asic_type == CHIP_POLARIS11)
173 return false;
174
175 return adev->gfx.mec.num_mec > 1;
176 }
177
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)178 bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
179 struct amdgpu_ring *ring)
180 {
181 int queue = ring->queue;
182 int pipe = ring->pipe;
183
184 /* Policy: use pipe1 queue0 as high priority graphics queue if we
185 * have more than one gfx pipe.
186 */
187 if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
188 adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
189 int me = ring->me;
190 int bit;
191
192 bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
193 if (ring == &adev->gfx.gfx_ring[bit])
194 return true;
195 }
196
197 return false;
198 }
199
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)200 bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
201 struct amdgpu_ring *ring)
202 {
203 /* Policy: use 1st queue as high priority compute queue if we
204 * have more than one compute queue.
205 */
206 if (adev->gfx.num_compute_rings > 1 &&
207 ring == &adev->gfx.compute_ring[0])
208 return true;
209
210 return false;
211 }
212
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)213 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
214 {
215 int i, j, queue, pipe;
216 bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
217 int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
218 adev->gfx.mec.num_queue_per_pipe,
219 adev->gfx.num_compute_rings);
220 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
221
222 if (multipipe_policy) {
223 /* policy: make queues evenly cross all pipes on MEC1 only
224 * for multiple xcc, just use the original policy for simplicity */
225 for (j = 0; j < num_xcc; j++) {
226 for (i = 0; i < max_queues_per_mec; i++) {
227 pipe = i % adev->gfx.mec.num_pipe_per_mec;
228 queue = (i / adev->gfx.mec.num_pipe_per_mec) %
229 adev->gfx.mec.num_queue_per_pipe;
230
231 set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
232 adev->gfx.mec_bitmap[j].queue_bitmap);
233 }
234 }
235 } else {
236 /* policy: amdgpu owns all queues in the given pipe */
237 for (j = 0; j < num_xcc; j++) {
238 for (i = 0; i < max_queues_per_mec; ++i)
239 set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
240 }
241 }
242
243 for (j = 0; j < num_xcc; j++) {
244 dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
245 bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
246 }
247 }
248
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)249 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
250 {
251 int i, queue, pipe;
252 bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
253 int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
254 adev->gfx.me.num_queue_per_pipe;
255
256 if (multipipe_policy) {
257 /* policy: amdgpu owns the first queue per pipe at this stage
258 * will extend to mulitple queues per pipe later */
259 for (i = 0; i < max_queues_per_me; i++) {
260 pipe = i % adev->gfx.me.num_pipe_per_me;
261 queue = (i / adev->gfx.me.num_pipe_per_me) %
262 adev->gfx.me.num_queue_per_pipe;
263
264 set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
265 adev->gfx.me.queue_bitmap);
266 }
267 } else {
268 for (i = 0; i < max_queues_per_me; ++i)
269 set_bit(i, adev->gfx.me.queue_bitmap);
270 }
271
272 /* update the number of active graphics rings */
273 adev->gfx.num_gfx_rings =
274 bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
275 }
276
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)277 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
278 struct amdgpu_ring *ring, int xcc_id)
279 {
280 int queue_bit;
281 int mec, pipe, queue;
282
283 queue_bit = adev->gfx.mec.num_mec
284 * adev->gfx.mec.num_pipe_per_mec
285 * adev->gfx.mec.num_queue_per_pipe;
286
287 while (--queue_bit >= 0) {
288 if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
289 continue;
290
291 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
292
293 /*
294 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
295 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
296 * only can be issued on queue 0.
297 */
298 if ((mec == 1 && pipe > 1) || queue != 0)
299 continue;
300
301 ring->me = mec + 1;
302 ring->pipe = pipe;
303 ring->queue = queue;
304
305 return 0;
306 }
307
308 dev_err(adev->dev, "Failed to find a queue for KIQ\n");
309 return -EINVAL;
310 }
311
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)312 int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
313 {
314 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
315 struct amdgpu_irq_src *irq = &kiq->irq;
316 struct amdgpu_ring *ring = &kiq->ring;
317 int r = 0;
318
319 mtx_init(&kiq->ring_lock, IPL_TTY);
320
321 ring->adev = NULL;
322 ring->ring_obj = NULL;
323 ring->use_doorbell = true;
324 ring->xcc_id = xcc_id;
325 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
326 ring->doorbell_index =
327 (adev->doorbell_index.kiq +
328 xcc_id * adev->doorbell_index.xcc_doorbell_range)
329 << 1;
330
331 r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
332 if (r)
333 return r;
334
335 ring->eop_gpu_addr = kiq->eop_gpu_addr;
336 ring->no_scheduler = true;
337 snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
338 (unsigned char)xcc_id, (unsigned char)ring->me,
339 (unsigned char)ring->pipe, (unsigned char)ring->queue);
340 r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
341 AMDGPU_RING_PRIO_DEFAULT, NULL);
342 if (r)
343 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
344
345 return r;
346 }
347
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)348 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
349 {
350 amdgpu_ring_fini(ring);
351 }
352
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)353 void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
354 {
355 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
356
357 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
358 }
359
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)360 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
361 unsigned int hpd_size, int xcc_id)
362 {
363 int r;
364 u32 *hpd;
365 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
366
367 r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
368 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
369 &kiq->eop_gpu_addr, (void **)&hpd);
370 if (r) {
371 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
372 return r;
373 }
374
375 memset(hpd, 0, hpd_size);
376
377 r = amdgpu_bo_reserve(kiq->eop_obj, true);
378 if (unlikely(r != 0))
379 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
380 amdgpu_bo_kunmap(kiq->eop_obj);
381 amdgpu_bo_unreserve(kiq->eop_obj);
382
383 return 0;
384 }
385
386 /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)387 int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
388 unsigned int mqd_size, int xcc_id)
389 {
390 int r, i, j;
391 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
392 struct amdgpu_ring *ring = &kiq->ring;
393 u32 domain = AMDGPU_GEM_DOMAIN_GTT;
394
395 #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
396 /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
397 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
398 domain |= AMDGPU_GEM_DOMAIN_VRAM;
399 #endif
400
401 /* create MQD for KIQ */
402 if (!adev->enable_mes_kiq && !ring->mqd_obj) {
403 /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
404 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
405 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
406 * KIQ MQD no matter SRIOV or Bare-metal
407 */
408 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
409 AMDGPU_GEM_DOMAIN_VRAM |
410 AMDGPU_GEM_DOMAIN_GTT,
411 &ring->mqd_obj,
412 &ring->mqd_gpu_addr,
413 &ring->mqd_ptr);
414 if (r) {
415 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
416 return r;
417 }
418
419 /* prepare MQD backup */
420 kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL);
421 if (!kiq->mqd_backup) {
422 dev_warn(adev->dev,
423 "no memory to create MQD backup for ring %s\n", ring->name);
424 return -ENOMEM;
425 }
426 }
427
428 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
429 /* create MQD for each KGQ */
430 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
431 ring = &adev->gfx.gfx_ring[i];
432 if (!ring->mqd_obj) {
433 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
434 domain, &ring->mqd_obj,
435 &ring->mqd_gpu_addr, &ring->mqd_ptr);
436 if (r) {
437 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
438 return r;
439 }
440
441 ring->mqd_size = mqd_size;
442 /* prepare MQD backup */
443 adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
444 if (!adev->gfx.me.mqd_backup[i]) {
445 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
446 return -ENOMEM;
447 }
448 }
449 }
450 }
451
452 /* create MQD for each KCQ */
453 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
454 j = i + xcc_id * adev->gfx.num_compute_rings;
455 ring = &adev->gfx.compute_ring[j];
456 if (!ring->mqd_obj) {
457 r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
458 domain, &ring->mqd_obj,
459 &ring->mqd_gpu_addr, &ring->mqd_ptr);
460 if (r) {
461 dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
462 return r;
463 }
464
465 ring->mqd_size = mqd_size;
466 /* prepare MQD backup */
467 adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL);
468 if (!adev->gfx.mec.mqd_backup[j]) {
469 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
470 return -ENOMEM;
471 }
472 }
473 }
474
475 return 0;
476 }
477
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)478 void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
479 {
480 struct amdgpu_ring *ring = NULL;
481 int i, j;
482 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
483
484 if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
485 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
486 ring = &adev->gfx.gfx_ring[i];
487 kfree(adev->gfx.me.mqd_backup[i]);
488 amdgpu_bo_free_kernel(&ring->mqd_obj,
489 &ring->mqd_gpu_addr,
490 &ring->mqd_ptr);
491 }
492 }
493
494 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
495 j = i + xcc_id * adev->gfx.num_compute_rings;
496 ring = &adev->gfx.compute_ring[j];
497 kfree(adev->gfx.mec.mqd_backup[j]);
498 amdgpu_bo_free_kernel(&ring->mqd_obj,
499 &ring->mqd_gpu_addr,
500 &ring->mqd_ptr);
501 }
502
503 ring = &kiq->ring;
504 kfree(kiq->mqd_backup);
505 amdgpu_bo_free_kernel(&ring->mqd_obj,
506 &ring->mqd_gpu_addr,
507 &ring->mqd_ptr);
508 }
509
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)510 int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
511 {
512 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
513 struct amdgpu_ring *kiq_ring = &kiq->ring;
514 int i, r = 0;
515 int j;
516
517 if (adev->enable_mes) {
518 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
519 j = i + xcc_id * adev->gfx.num_compute_rings;
520 amdgpu_mes_unmap_legacy_queue(adev,
521 &adev->gfx.compute_ring[j],
522 RESET_QUEUES, 0, 0);
523 }
524 return 0;
525 }
526
527 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
528 return -EINVAL;
529
530 if (!kiq_ring->sched.ready || adev->job_hang)
531 return 0;
532 /**
533 * This is workaround: only skip kiq_ring test
534 * during ras recovery in suspend stage for gfx9.4.3
535 */
536 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
537 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
538 amdgpu_ras_in_recovery(adev))
539 return 0;
540
541 spin_lock(&kiq->ring_lock);
542 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
543 adev->gfx.num_compute_rings)) {
544 spin_unlock(&kiq->ring_lock);
545 return -ENOMEM;
546 }
547
548 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
549 j = i + xcc_id * adev->gfx.num_compute_rings;
550 kiq->pmf->kiq_unmap_queues(kiq_ring,
551 &adev->gfx.compute_ring[j],
552 RESET_QUEUES, 0, 0);
553 }
554 /* Submit unmap queue packet */
555 amdgpu_ring_commit(kiq_ring);
556 /*
557 * Ring test will do a basic scratch register change check. Just run
558 * this to ensure that unmap queues that is submitted before got
559 * processed successfully before returning.
560 */
561 r = amdgpu_ring_test_helper(kiq_ring);
562
563 spin_unlock(&kiq->ring_lock);
564
565 return r;
566 }
567
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)568 int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
569 {
570 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
571 struct amdgpu_ring *kiq_ring = &kiq->ring;
572 int i, r = 0;
573 int j;
574
575 if (adev->enable_mes) {
576 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
577 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
578 j = i + xcc_id * adev->gfx.num_gfx_rings;
579 amdgpu_mes_unmap_legacy_queue(adev,
580 &adev->gfx.gfx_ring[j],
581 PREEMPT_QUEUES, 0, 0);
582 }
583 }
584 return 0;
585 }
586
587 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
588 return -EINVAL;
589
590 if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang)
591 return 0;
592
593 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
594 spin_lock(&kiq->ring_lock);
595 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
596 adev->gfx.num_gfx_rings)) {
597 spin_unlock(&kiq->ring_lock);
598 return -ENOMEM;
599 }
600
601 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
602 j = i + xcc_id * adev->gfx.num_gfx_rings;
603 kiq->pmf->kiq_unmap_queues(kiq_ring,
604 &adev->gfx.gfx_ring[j],
605 PREEMPT_QUEUES, 0, 0);
606 }
607 /* Submit unmap queue packet */
608 amdgpu_ring_commit(kiq_ring);
609
610 /*
611 * Ring test will do a basic scratch register change check.
612 * Just run this to ensure that unmap queues that is submitted
613 * before got processed successfully before returning.
614 */
615 r = amdgpu_ring_test_helper(kiq_ring);
616 spin_unlock(&kiq->ring_lock);
617 }
618
619 return r;
620 }
621
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)622 int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
623 int queue_bit)
624 {
625 int mec, pipe, queue;
626 int set_resource_bit = 0;
627
628 amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
629
630 set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
631
632 return set_resource_bit;
633 }
634
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)635 static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
636 {
637 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
638 struct amdgpu_ring *kiq_ring = &kiq->ring;
639 uint64_t queue_mask = ~0ULL;
640 int r, i, j;
641
642 amdgpu_device_flush_hdp(adev, NULL);
643
644 if (!adev->enable_uni_mes) {
645 spin_lock(&kiq->ring_lock);
646 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
647 if (r) {
648 dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
649 spin_unlock(&kiq->ring_lock);
650 return r;
651 }
652
653 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
654 r = amdgpu_ring_test_helper(kiq_ring);
655 spin_unlock(&kiq->ring_lock);
656 if (r)
657 dev_err(adev->dev, "KIQ failed to set resources\n");
658 }
659
660 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
661 j = i + xcc_id * adev->gfx.num_compute_rings;
662 r = amdgpu_mes_map_legacy_queue(adev,
663 &adev->gfx.compute_ring[j]);
664 if (r) {
665 dev_err(adev->dev, "failed to map compute queue\n");
666 return r;
667 }
668 }
669
670 return 0;
671 }
672
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)673 int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
674 {
675 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
676 struct amdgpu_ring *kiq_ring = &kiq->ring;
677 uint64_t queue_mask = 0;
678 int r, i, j;
679
680 if (adev->mes.enable_legacy_queue_map)
681 return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
682
683 if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
684 return -EINVAL;
685
686 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
687 if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
688 continue;
689
690 /* This situation may be hit in the future if a new HW
691 * generation exposes more than 64 queues. If so, the
692 * definition of queue_mask needs updating */
693 if (WARN_ON(i > (sizeof(queue_mask)*8))) {
694 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
695 break;
696 }
697
698 queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
699 }
700
701 amdgpu_device_flush_hdp(adev, NULL);
702
703 DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
704 kiq_ring->queue);
705
706 spin_lock(&kiq->ring_lock);
707 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
708 adev->gfx.num_compute_rings +
709 kiq->pmf->set_resources_size);
710 if (r) {
711 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
712 spin_unlock(&kiq->ring_lock);
713 return r;
714 }
715
716 kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
717 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
718 j = i + xcc_id * adev->gfx.num_compute_rings;
719 kiq->pmf->kiq_map_queues(kiq_ring,
720 &adev->gfx.compute_ring[j]);
721 }
722 /* Submit map queue packet */
723 amdgpu_ring_commit(kiq_ring);
724 /*
725 * Ring test will do a basic scratch register change check. Just run
726 * this to ensure that map queues that is submitted before got
727 * processed successfully before returning.
728 */
729 r = amdgpu_ring_test_helper(kiq_ring);
730 spin_unlock(&kiq->ring_lock);
731 if (r)
732 DRM_ERROR("KCQ enable failed\n");
733
734 return r;
735 }
736
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)737 int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
738 {
739 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
740 struct amdgpu_ring *kiq_ring = &kiq->ring;
741 int r, i, j;
742
743 if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
744 return -EINVAL;
745
746 amdgpu_device_flush_hdp(adev, NULL);
747
748 if (adev->mes.enable_legacy_queue_map) {
749 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
750 j = i + xcc_id * adev->gfx.num_gfx_rings;
751 r = amdgpu_mes_map_legacy_queue(adev,
752 &adev->gfx.gfx_ring[j]);
753 if (r) {
754 DRM_ERROR("failed to map gfx queue\n");
755 return r;
756 }
757 }
758
759 return 0;
760 }
761
762 spin_lock(&kiq->ring_lock);
763 /* No need to map kcq on the slave */
764 if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
765 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
766 adev->gfx.num_gfx_rings);
767 if (r) {
768 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
769 spin_unlock(&kiq->ring_lock);
770 return r;
771 }
772
773 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
774 j = i + xcc_id * adev->gfx.num_gfx_rings;
775 kiq->pmf->kiq_map_queues(kiq_ring,
776 &adev->gfx.gfx_ring[j]);
777 }
778 }
779 /* Submit map queue packet */
780 amdgpu_ring_commit(kiq_ring);
781 /*
782 * Ring test will do a basic scratch register change check. Just run
783 * this to ensure that map queues that is submitted before got
784 * processed successfully before returning.
785 */
786 r = amdgpu_ring_test_helper(kiq_ring);
787 spin_unlock(&kiq->ring_lock);
788 if (r)
789 DRM_ERROR("KGQ enable failed\n");
790
791 return r;
792 }
793
794 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
795 *
796 * @adev: amdgpu_device pointer
797 * @bool enable true: enable gfx off feature, false: disable gfx off feature
798 *
799 * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled.
800 * 2. other client can send request to disable gfx off feature, the request should be honored.
801 * 3. other client can cancel their request of disable gfx off feature
802 * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
803 */
804
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)805 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
806 {
807 unsigned long delay = GFX_OFF_DELAY_ENABLE;
808
809 if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
810 return;
811
812 mutex_lock(&adev->gfx.gfx_off_mutex);
813
814 if (enable) {
815 /* If the count is already 0, it means there's an imbalance bug somewhere.
816 * Note that the bug may be in a different caller than the one which triggers the
817 * WARN_ON_ONCE.
818 */
819 if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
820 goto unlock;
821
822 adev->gfx.gfx_off_req_count--;
823
824 if (adev->gfx.gfx_off_req_count == 0 &&
825 !adev->gfx.gfx_off_state) {
826 /* If going to s2idle, no need to wait */
827 if (adev->in_s0ix) {
828 if (!amdgpu_dpm_set_powergating_by_smu(adev,
829 AMD_IP_BLOCK_TYPE_GFX, true))
830 adev->gfx.gfx_off_state = true;
831 } else {
832 schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
833 delay);
834 }
835 }
836 } else {
837 if (adev->gfx.gfx_off_req_count == 0) {
838 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
839
840 if (adev->gfx.gfx_off_state &&
841 !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
842 adev->gfx.gfx_off_state = false;
843
844 if (adev->gfx.funcs->init_spm_golden) {
845 dev_dbg(adev->dev,
846 "GFXOFF is disabled, re-init SPM golden settings\n");
847 amdgpu_gfx_init_spm_golden(adev);
848 }
849 }
850 }
851
852 adev->gfx.gfx_off_req_count++;
853 }
854
855 unlock:
856 mutex_unlock(&adev->gfx.gfx_off_mutex);
857 }
858
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)859 int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
860 {
861 int r = 0;
862
863 mutex_lock(&adev->gfx.gfx_off_mutex);
864
865 r = amdgpu_dpm_set_residency_gfxoff(adev, value);
866
867 mutex_unlock(&adev->gfx.gfx_off_mutex);
868
869 return r;
870 }
871
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)872 int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
873 {
874 int r = 0;
875
876 mutex_lock(&adev->gfx.gfx_off_mutex);
877
878 r = amdgpu_dpm_get_residency_gfxoff(adev, value);
879
880 mutex_unlock(&adev->gfx.gfx_off_mutex);
881
882 return r;
883 }
884
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)885 int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
886 {
887 int r = 0;
888
889 mutex_lock(&adev->gfx.gfx_off_mutex);
890
891 r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
892
893 mutex_unlock(&adev->gfx.gfx_off_mutex);
894
895 return r;
896 }
897
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)898 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
899 {
900
901 int r = 0;
902
903 mutex_lock(&adev->gfx.gfx_off_mutex);
904
905 r = amdgpu_dpm_get_status_gfxoff(adev, value);
906
907 mutex_unlock(&adev->gfx.gfx_off_mutex);
908
909 return r;
910 }
911
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)912 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
913 {
914 int r;
915
916 if (amdgpu_ras_is_supported(adev, ras_block->block)) {
917 if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
918 r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
919 if (r)
920 return r;
921 }
922
923 r = amdgpu_ras_block_late_init(adev, ras_block);
924 if (r)
925 return r;
926
927 if (adev->gfx.cp_ecc_error_irq.funcs) {
928 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
929 if (r)
930 goto late_fini;
931 }
932 } else {
933 amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
934 }
935
936 return 0;
937 late_fini:
938 amdgpu_ras_block_late_fini(adev, ras_block);
939 return r;
940 }
941
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)942 int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
943 {
944 int err = 0;
945 struct amdgpu_gfx_ras *ras = NULL;
946
947 /* adev->gfx.ras is NULL, which means gfx does not
948 * support ras function, then do nothing here.
949 */
950 if (!adev->gfx.ras)
951 return 0;
952
953 ras = adev->gfx.ras;
954
955 err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
956 if (err) {
957 dev_err(adev->dev, "Failed to register gfx ras block!\n");
958 return err;
959 }
960
961 strlcpy(ras->ras_block.ras_comm.name, "gfx",
962 sizeof(ras->ras_block.ras_comm.name));
963 ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
964 ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
965 adev->gfx.ras_if = &ras->ras_block.ras_comm;
966
967 /* If not define special ras_late_init function, use gfx default ras_late_init */
968 if (!ras->ras_block.ras_late_init)
969 ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
970
971 /* If not defined special ras_cb function, use default ras_cb */
972 if (!ras->ras_block.ras_cb)
973 ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
974
975 return 0;
976 }
977
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)978 int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
979 struct amdgpu_iv_entry *entry)
980 {
981 if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
982 return adev->gfx.ras->poison_consumption_handler(adev, entry);
983
984 return 0;
985 }
986
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)987 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
988 void *err_data,
989 struct amdgpu_iv_entry *entry)
990 {
991 /* TODO ue will trigger an interrupt.
992 *
993 * When “Full RAS” is enabled, the per-IP interrupt sources should
994 * be disabled and the driver should only look for the aggregated
995 * interrupt via sync flood
996 */
997 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
998 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
999 if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
1000 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
1001 adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1002 amdgpu_ras_reset_gpu(adev);
1003 }
1004 return AMDGPU_RAS_SUCCESS;
1005 }
1006
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1007 int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1008 struct amdgpu_irq_src *source,
1009 struct amdgpu_iv_entry *entry)
1010 {
1011 struct ras_common_if *ras_if = adev->gfx.ras_if;
1012 struct ras_dispatch_if ih_data = {
1013 .entry = entry,
1014 };
1015
1016 if (!ras_if)
1017 return 0;
1018
1019 ih_data.head = *ras_if;
1020
1021 DRM_ERROR("CP ECC ERROR IRQ\n");
1022 amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1023 return 0;
1024 }
1025
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1026 void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1027 void *ras_error_status,
1028 void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1029 int xcc_id))
1030 {
1031 int i;
1032 int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1033 uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1034 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1035
1036 if (err_data) {
1037 err_data->ue_count = 0;
1038 err_data->ce_count = 0;
1039 }
1040
1041 for_each_inst(i, xcc_mask)
1042 func(adev, ras_error_status, i);
1043 }
1044
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)1045 uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1046 {
1047 signed long r, cnt = 0;
1048 unsigned long flags;
1049 uint32_t seq, reg_val_offs = 0, value = 0;
1050 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1051 struct amdgpu_ring *ring = &kiq->ring;
1052
1053 if (amdgpu_device_skip_hw_access(adev))
1054 return 0;
1055
1056 if (adev->mes.ring[0].sched.ready)
1057 return amdgpu_mes_rreg(adev, reg);
1058
1059 BUG_ON(!ring->funcs->emit_rreg);
1060
1061 spin_lock_irqsave(&kiq->ring_lock, flags);
1062 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
1063 pr_err("critical bug! too many kiq readers\n");
1064 goto failed_unlock;
1065 }
1066 r = amdgpu_ring_alloc(ring, 32);
1067 if (r)
1068 goto failed_unlock;
1069
1070 amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
1071 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1072 if (r)
1073 goto failed_undo;
1074
1075 amdgpu_ring_commit(ring);
1076 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1077
1078 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1079
1080 /* don't wait anymore for gpu reset case because this way may
1081 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1082 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1083 * never return if we keep waiting in virt_kiq_rreg, which cause
1084 * gpu_recover() hang there.
1085 *
1086 * also don't wait anymore for IRQ context
1087 * */
1088 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1089 goto failed_kiq_read;
1090
1091 might_sleep();
1092 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1093 drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1094 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1095 }
1096
1097 if (cnt > MAX_KIQ_REG_TRY)
1098 goto failed_kiq_read;
1099
1100 mb();
1101 value = adev->wb.wb[reg_val_offs];
1102 amdgpu_device_wb_free(adev, reg_val_offs);
1103 return value;
1104
1105 failed_undo:
1106 amdgpu_ring_undo(ring);
1107 failed_unlock:
1108 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1109 failed_kiq_read:
1110 if (reg_val_offs)
1111 amdgpu_device_wb_free(adev, reg_val_offs);
1112 dev_err(adev->dev, "failed to read reg:%x\n", reg);
1113 return ~0;
1114 }
1115
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)1116 void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1117 {
1118 signed long r, cnt = 0;
1119 unsigned long flags;
1120 uint32_t seq;
1121 struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1122 struct amdgpu_ring *ring = &kiq->ring;
1123
1124 BUG_ON(!ring->funcs->emit_wreg);
1125
1126 if (amdgpu_device_skip_hw_access(adev))
1127 return;
1128
1129 if (adev->mes.ring[0].sched.ready) {
1130 amdgpu_mes_wreg(adev, reg, v);
1131 return;
1132 }
1133
1134 spin_lock_irqsave(&kiq->ring_lock, flags);
1135 r = amdgpu_ring_alloc(ring, 32);
1136 if (r)
1137 goto failed_unlock;
1138
1139 amdgpu_ring_emit_wreg(ring, reg, v);
1140 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
1141 if (r)
1142 goto failed_undo;
1143
1144 amdgpu_ring_commit(ring);
1145 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1146
1147 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1148
1149 /* don't wait anymore for gpu reset case because this way may
1150 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1151 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1152 * never return if we keep waiting in virt_kiq_rreg, which cause
1153 * gpu_recover() hang there.
1154 *
1155 * also don't wait anymore for IRQ context
1156 * */
1157 if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1158 goto failed_kiq_write;
1159
1160 might_sleep();
1161 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1162
1163 drm_msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1164 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1165 }
1166
1167 if (cnt > MAX_KIQ_REG_TRY)
1168 goto failed_kiq_write;
1169
1170 return;
1171
1172 failed_undo:
1173 amdgpu_ring_undo(ring);
1174 failed_unlock:
1175 spin_unlock_irqrestore(&kiq->ring_lock, flags);
1176 failed_kiq_write:
1177 dev_err(adev->dev, "failed to write reg:%x\n", reg);
1178 }
1179
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1180 int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1181 {
1182 if (amdgpu_num_kcq == -1) {
1183 return 8;
1184 } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1185 dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1186 return 8;
1187 }
1188 return amdgpu_num_kcq;
1189 }
1190
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1191 void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
1192 uint32_t ucode_id)
1193 {
1194 const struct gfx_firmware_header_v1_0 *cp_hdr;
1195 const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1196 struct amdgpu_firmware_info *info = NULL;
1197 const struct firmware *ucode_fw;
1198 unsigned int fw_size;
1199
1200 switch (ucode_id) {
1201 case AMDGPU_UCODE_ID_CP_PFP:
1202 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1203 adev->gfx.pfp_fw->data;
1204 adev->gfx.pfp_fw_version =
1205 le32_to_cpu(cp_hdr->header.ucode_version);
1206 adev->gfx.pfp_feature_version =
1207 le32_to_cpu(cp_hdr->ucode_feature_version);
1208 ucode_fw = adev->gfx.pfp_fw;
1209 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1210 break;
1211 case AMDGPU_UCODE_ID_CP_RS64_PFP:
1212 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1213 adev->gfx.pfp_fw->data;
1214 adev->gfx.pfp_fw_version =
1215 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1216 adev->gfx.pfp_feature_version =
1217 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1218 ucode_fw = adev->gfx.pfp_fw;
1219 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1220 break;
1221 case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1222 case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1223 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1224 adev->gfx.pfp_fw->data;
1225 ucode_fw = adev->gfx.pfp_fw;
1226 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1227 break;
1228 case AMDGPU_UCODE_ID_CP_ME:
1229 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1230 adev->gfx.me_fw->data;
1231 adev->gfx.me_fw_version =
1232 le32_to_cpu(cp_hdr->header.ucode_version);
1233 adev->gfx.me_feature_version =
1234 le32_to_cpu(cp_hdr->ucode_feature_version);
1235 ucode_fw = adev->gfx.me_fw;
1236 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1237 break;
1238 case AMDGPU_UCODE_ID_CP_RS64_ME:
1239 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1240 adev->gfx.me_fw->data;
1241 adev->gfx.me_fw_version =
1242 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1243 adev->gfx.me_feature_version =
1244 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1245 ucode_fw = adev->gfx.me_fw;
1246 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1247 break;
1248 case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1249 case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1250 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1251 adev->gfx.me_fw->data;
1252 ucode_fw = adev->gfx.me_fw;
1253 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1254 break;
1255 case AMDGPU_UCODE_ID_CP_CE:
1256 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1257 adev->gfx.ce_fw->data;
1258 adev->gfx.ce_fw_version =
1259 le32_to_cpu(cp_hdr->header.ucode_version);
1260 adev->gfx.ce_feature_version =
1261 le32_to_cpu(cp_hdr->ucode_feature_version);
1262 ucode_fw = adev->gfx.ce_fw;
1263 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1264 break;
1265 case AMDGPU_UCODE_ID_CP_MEC1:
1266 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1267 adev->gfx.mec_fw->data;
1268 adev->gfx.mec_fw_version =
1269 le32_to_cpu(cp_hdr->header.ucode_version);
1270 adev->gfx.mec_feature_version =
1271 le32_to_cpu(cp_hdr->ucode_feature_version);
1272 ucode_fw = adev->gfx.mec_fw;
1273 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1274 le32_to_cpu(cp_hdr->jt_size) * 4;
1275 break;
1276 case AMDGPU_UCODE_ID_CP_MEC1_JT:
1277 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1278 adev->gfx.mec_fw->data;
1279 ucode_fw = adev->gfx.mec_fw;
1280 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1281 break;
1282 case AMDGPU_UCODE_ID_CP_MEC2:
1283 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1284 adev->gfx.mec2_fw->data;
1285 adev->gfx.mec2_fw_version =
1286 le32_to_cpu(cp_hdr->header.ucode_version);
1287 adev->gfx.mec2_feature_version =
1288 le32_to_cpu(cp_hdr->ucode_feature_version);
1289 ucode_fw = adev->gfx.mec2_fw;
1290 fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1291 le32_to_cpu(cp_hdr->jt_size) * 4;
1292 break;
1293 case AMDGPU_UCODE_ID_CP_MEC2_JT:
1294 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1295 adev->gfx.mec2_fw->data;
1296 ucode_fw = adev->gfx.mec2_fw;
1297 fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1298 break;
1299 case AMDGPU_UCODE_ID_CP_RS64_MEC:
1300 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1301 adev->gfx.mec_fw->data;
1302 adev->gfx.mec_fw_version =
1303 le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1304 adev->gfx.mec_feature_version =
1305 le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1306 ucode_fw = adev->gfx.mec_fw;
1307 fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1308 break;
1309 case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1310 case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1311 case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1312 case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1313 cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1314 adev->gfx.mec_fw->data;
1315 ucode_fw = adev->gfx.mec_fw;
1316 fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1317 break;
1318 default:
1319 dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
1320 return;
1321 }
1322
1323 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1324 info = &adev->firmware.ucode[ucode_id];
1325 info->ucode_id = ucode_id;
1326 info->fw = ucode_fw;
1327 adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1328 }
1329 }
1330
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)1331 bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
1332 {
1333 return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
1334 adev->gfx.num_xcc_per_xcp : 1));
1335 }
1336
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1337 static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
1338 struct device_attribute *addr,
1339 char *buf)
1340 {
1341 struct drm_device *ddev = dev_get_drvdata(dev);
1342 struct amdgpu_device *adev = drm_to_adev(ddev);
1343 int mode;
1344
1345 mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1346 AMDGPU_XCP_FL_NONE);
1347
1348 return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
1349 }
1350
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)1351 static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
1352 struct device_attribute *addr,
1353 const char *buf, size_t count)
1354 {
1355 struct drm_device *ddev = dev_get_drvdata(dev);
1356 struct amdgpu_device *adev = drm_to_adev(ddev);
1357 enum amdgpu_gfx_partition mode;
1358 int ret = 0, num_xcc;
1359
1360 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1361 if (num_xcc % 2 != 0)
1362 return -EINVAL;
1363
1364 if (!strncasecmp("SPX", buf, strlen("SPX"))) {
1365 mode = AMDGPU_SPX_PARTITION_MODE;
1366 } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1367 /*
1368 * DPX mode needs AIDs to be in multiple of 2.
1369 * Each AID connects 2 XCCs.
1370 */
1371 if (num_xcc%4)
1372 return -EINVAL;
1373 mode = AMDGPU_DPX_PARTITION_MODE;
1374 } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
1375 if (num_xcc != 6)
1376 return -EINVAL;
1377 mode = AMDGPU_TPX_PARTITION_MODE;
1378 } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
1379 if (num_xcc != 8)
1380 return -EINVAL;
1381 mode = AMDGPU_QPX_PARTITION_MODE;
1382 } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
1383 mode = AMDGPU_CPX_PARTITION_MODE;
1384 } else {
1385 return -EINVAL;
1386 }
1387
1388 ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
1389
1390 if (ret)
1391 return ret;
1392
1393 return count;
1394 }
1395
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)1396 static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
1397 struct device_attribute *addr,
1398 char *buf)
1399 {
1400 struct drm_device *ddev = dev_get_drvdata(dev);
1401 struct amdgpu_device *adev = drm_to_adev(ddev);
1402 char *supported_partition;
1403
1404 /* TBD */
1405 switch (NUM_XCC(adev->gfx.xcc_mask)) {
1406 case 8:
1407 supported_partition = "SPX, DPX, QPX, CPX";
1408 break;
1409 case 6:
1410 supported_partition = "SPX, TPX, CPX";
1411 break;
1412 case 4:
1413 supported_partition = "SPX, DPX, CPX";
1414 break;
1415 /* this seems only existing in emulation phase */
1416 case 2:
1417 supported_partition = "SPX, CPX";
1418 break;
1419 default:
1420 supported_partition = "Not supported";
1421 break;
1422 }
1423
1424 return sysfs_emit(buf, "%s\n", supported_partition);
1425 }
1426
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1427 static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1428 {
1429 struct amdgpu_device *adev = ring->adev;
1430 struct drm_gpu_scheduler *sched = &ring->sched;
1431 struct drm_sched_entity entity;
1432 struct dma_fence *f;
1433 struct amdgpu_job *job;
1434 struct amdgpu_ib *ib;
1435 int i, r;
1436
1437 /* Initialize the scheduler entity */
1438 r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1439 &sched, 1, NULL);
1440 if (r) {
1441 dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1442 goto err;
1443 }
1444
1445 r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
1446 64, 0,
1447 &job);
1448 if (r)
1449 goto err;
1450
1451 job->enforce_isolation = true;
1452
1453 ib = &job->ibs[0];
1454 for (i = 0; i <= ring->funcs->align_mask; ++i)
1455 ib->ptr[i] = ring->funcs->nop;
1456 ib->length_dw = ring->funcs->align_mask + 1;
1457
1458 f = amdgpu_job_submit(job);
1459
1460 r = dma_fence_wait(f, false);
1461 if (r)
1462 goto err;
1463
1464 dma_fence_put(f);
1465
1466 /* Clean up the scheduler entity */
1467 drm_sched_entity_destroy(&entity);
1468 return 0;
1469
1470 err:
1471 return r;
1472 }
1473
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1474 static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1475 {
1476 int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1477 struct amdgpu_ring *ring;
1478 int num_xcc_to_clear;
1479 int i, r, xcc_id;
1480
1481 if (adev->gfx.num_xcc_per_xcp)
1482 num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1483 else
1484 num_xcc_to_clear = 1;
1485
1486 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1487 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1488 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1489 if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1490 r = amdgpu_gfx_run_cleaner_shader_job(ring);
1491 if (r)
1492 return r;
1493 num_xcc_to_clear--;
1494 break;
1495 }
1496 }
1497 }
1498
1499 if (num_xcc_to_clear)
1500 return -ENOENT;
1501
1502 return 0;
1503 }
1504
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1505 static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1506 struct device_attribute *attr,
1507 const char *buf,
1508 size_t count)
1509 {
1510 STUB();
1511 return -ENOSYS;
1512 #ifdef notyet
1513 struct drm_device *ddev = dev_get_drvdata(dev);
1514 struct amdgpu_device *adev = drm_to_adev(ddev);
1515 int ret;
1516 long value;
1517
1518 if (amdgpu_in_reset(adev))
1519 return -EPERM;
1520 if (adev->in_suspend && !adev->in_runpm)
1521 return -EPERM;
1522
1523 ret = kstrtol(buf, 0, &value);
1524
1525 if (ret)
1526 return -EINVAL;
1527
1528 if (value < 0)
1529 return -EINVAL;
1530
1531 if (adev->xcp_mgr) {
1532 if (value >= adev->xcp_mgr->num_xcps)
1533 return -EINVAL;
1534 } else {
1535 if (value > 1)
1536 return -EINVAL;
1537 }
1538
1539 ret = pm_runtime_get_sync(ddev->dev);
1540 if (ret < 0) {
1541 pm_runtime_put_autosuspend(ddev->dev);
1542 return ret;
1543 }
1544
1545 ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1546
1547 pm_runtime_mark_last_busy(ddev->dev);
1548 pm_runtime_put_autosuspend(ddev->dev);
1549
1550 if (ret)
1551 return ret;
1552
1553 return count;
1554 #endif
1555 }
1556
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1557 static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1558 struct device_attribute *attr,
1559 char *buf)
1560 {
1561 struct drm_device *ddev = dev_get_drvdata(dev);
1562 struct amdgpu_device *adev = drm_to_adev(ddev);
1563 int i;
1564 ssize_t size = 0;
1565
1566 if (adev->xcp_mgr) {
1567 for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1568 size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1569 if (i < (adev->xcp_mgr->num_xcps - 1))
1570 size += sysfs_emit_at(buf, size, " ");
1571 }
1572 buf[size++] = '\n';
1573 } else {
1574 size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1575 }
1576
1577 return size;
1578 }
1579
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1580 static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1581 struct device_attribute *attr,
1582 const char *buf, size_t count)
1583 {
1584 STUB();
1585 return -ENOSYS;
1586 #ifdef notyet
1587 struct drm_device *ddev = dev_get_drvdata(dev);
1588 struct amdgpu_device *adev = drm_to_adev(ddev);
1589 long partition_values[MAX_XCP] = {0};
1590 int ret, i, num_partitions;
1591 const char *input_buf = buf;
1592
1593 for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1594 ret = sscanf(input_buf, "%ld", &partition_values[i]);
1595 if (ret <= 0)
1596 break;
1597
1598 /* Move the pointer to the next value in the string */
1599 input_buf = strchr(input_buf, ' ');
1600 if (input_buf) {
1601 input_buf++;
1602 } else {
1603 i++;
1604 break;
1605 }
1606 }
1607 num_partitions = i;
1608
1609 if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1610 return -EINVAL;
1611
1612 if (!adev->xcp_mgr && num_partitions != 1)
1613 return -EINVAL;
1614
1615 for (i = 0; i < num_partitions; i++) {
1616 if (partition_values[i] != 0 && partition_values[i] != 1)
1617 return -EINVAL;
1618 }
1619
1620 mutex_lock(&adev->enforce_isolation_mutex);
1621
1622 for (i = 0; i < num_partitions; i++) {
1623 if (adev->enforce_isolation[i] && !partition_values[i]) {
1624 /* Going from enabled to disabled */
1625 amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
1626 } else if (!adev->enforce_isolation[i] && partition_values[i]) {
1627 /* Going from disabled to enabled */
1628 amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
1629 }
1630 adev->enforce_isolation[i] = partition_values[i];
1631 }
1632
1633 mutex_unlock(&adev->enforce_isolation_mutex);
1634
1635 return count;
1636 #endif
1637 }
1638
1639 static DEVICE_ATTR(run_cleaner_shader, 0200,
1640 NULL, amdgpu_gfx_set_run_cleaner_shader);
1641
1642 static DEVICE_ATTR(enforce_isolation, 0644,
1643 amdgpu_gfx_get_enforce_isolation,
1644 amdgpu_gfx_set_enforce_isolation);
1645
1646 static DEVICE_ATTR(current_compute_partition, 0644,
1647 amdgpu_gfx_get_current_compute_partition,
1648 amdgpu_gfx_set_compute_partition);
1649
1650 static DEVICE_ATTR(available_compute_partition, 0444,
1651 amdgpu_gfx_get_available_compute_partition, NULL);
1652
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1653 int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1654 {
1655 int r;
1656
1657 r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
1658 if (r)
1659 return r;
1660
1661 r = device_create_file(adev->dev, &dev_attr_available_compute_partition);
1662
1663 return r;
1664 }
1665
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1666 void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1667 {
1668 device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1669 device_remove_file(adev->dev, &dev_attr_available_compute_partition);
1670 }
1671
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1672 int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1673 {
1674 int r;
1675
1676 r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1677 if (r)
1678 return r;
1679
1680 r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1681 if (r)
1682 return r;
1683
1684 return 0;
1685 }
1686
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1687 void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1688 {
1689 device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1690 device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1691 }
1692
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1693 int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1694 unsigned int cleaner_shader_size)
1695 {
1696 if (!adev->gfx.enable_cleaner_shader)
1697 return -EOPNOTSUPP;
1698
1699 return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1700 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1701 &adev->gfx.cleaner_shader_obj,
1702 &adev->gfx.cleaner_shader_gpu_addr,
1703 (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1704 }
1705
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1706 void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1707 {
1708 if (!adev->gfx.enable_cleaner_shader)
1709 return;
1710
1711 amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1712 &adev->gfx.cleaner_shader_gpu_addr,
1713 (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1714 }
1715
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1716 void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1717 unsigned int cleaner_shader_size,
1718 const void *cleaner_shader_ptr)
1719 {
1720 if (!adev->gfx.enable_cleaner_shader)
1721 return;
1722
1723 if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1724 memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1725 cleaner_shader_size);
1726 }
1727
1728 /**
1729 * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1730 * @adev: amdgpu_device pointer
1731 * @idx: Index of the scheduler to control
1732 * @enable: Whether to enable or disable the KFD scheduler
1733 *
1734 * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1735 * from the KGD. It is part of the cleaner shader feature. This function plays
1736 * a key role in enforcing process isolation on the GPU.
1737 *
1738 * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1739 * track of the number of requests to enable the KFD scheduler. When a request
1740 * to enable the KFD scheduler is made, the reference count is decremented.
1741 * When the reference count reaches zero, a delayed work is scheduled to
1742 * enforce isolation after a delay of GFX_SLICE_PERIOD.
1743 *
1744 * When a request to disable the KFD scheduler is made, the function first
1745 * checks if the reference count is zero. If it is, it cancels the delayed work
1746 * for enforcing isolation and checks if the KFD scheduler is active. If the
1747 * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1748 * sets the KFD scheduler state to inactive. Then, it increments the reference
1749 * count.
1750 *
1751 * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1752 * scheduler state and reference count are updated atomically.
1753 *
1754 * Note: If the reference count is already zero when a request to enable the
1755 * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1756 * function triggers a warning in this case.
1757 */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1758 static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1759 bool enable)
1760 {
1761 mutex_lock(&adev->gfx.kfd_sch_mutex);
1762
1763 if (enable) {
1764 /* If the count is already 0, it means there's an imbalance bug somewhere.
1765 * Note that the bug may be in a different caller than the one which triggers the
1766 * WARN_ON_ONCE.
1767 */
1768 if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1769 dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1770 goto unlock;
1771 }
1772
1773 adev->gfx.kfd_sch_req_count[idx]--;
1774
1775 if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1776 adev->gfx.kfd_sch_inactive[idx]) {
1777 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1778 GFX_SLICE_PERIOD);
1779 }
1780 } else {
1781 if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1782 cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1783 if (!adev->gfx.kfd_sch_inactive[idx]) {
1784 amdgpu_amdkfd_stop_sched(adev, idx);
1785 adev->gfx.kfd_sch_inactive[idx] = true;
1786 }
1787 }
1788
1789 adev->gfx.kfd_sch_req_count[idx]++;
1790 }
1791
1792 unlock:
1793 mutex_unlock(&adev->gfx.kfd_sch_mutex);
1794 }
1795
1796 /**
1797 * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1798 *
1799 * @work: work_struct.
1800 *
1801 * This function is the work handler for enforcing shader isolation on AMD GPUs.
1802 * It counts the number of emitted fences for each GFX and compute ring. If there
1803 * are any fences, it schedules the `enforce_isolation_work` to be run after a
1804 * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1805 * Driver (KFD) to resume the runqueue. The function is synchronized using the
1806 * `enforce_isolation_mutex`.
1807 */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1808 void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1809 {
1810 struct amdgpu_isolation_work *isolation_work =
1811 container_of(work, struct amdgpu_isolation_work, work.work);
1812 struct amdgpu_device *adev = isolation_work->adev;
1813 u32 i, idx, fences = 0;
1814
1815 if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1816 idx = 0;
1817 else
1818 idx = isolation_work->xcp_id;
1819
1820 if (idx >= MAX_XCP)
1821 return;
1822
1823 mutex_lock(&adev->enforce_isolation_mutex);
1824 for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1825 if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1826 fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1827 }
1828 for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1829 if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1830 fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1831 }
1832 if (fences) {
1833 schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1834 GFX_SLICE_PERIOD);
1835 } else {
1836 /* Tell KFD to resume the runqueue */
1837 if (adev->kfd.init_complete) {
1838 WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
1839 WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
1840 amdgpu_amdkfd_start_sched(adev, idx);
1841 adev->gfx.kfd_sch_inactive[idx] = false;
1842 }
1843 }
1844 mutex_unlock(&adev->enforce_isolation_mutex);
1845 }
1846
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)1847 void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
1848 {
1849 struct amdgpu_device *adev = ring->adev;
1850 u32 idx;
1851
1852 if (!adev->gfx.enable_cleaner_shader)
1853 return;
1854
1855 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1856 idx = 0;
1857 else
1858 idx = ring->xcp_id;
1859
1860 if (idx >= MAX_XCP)
1861 return;
1862
1863 mutex_lock(&adev->enforce_isolation_mutex);
1864 if (adev->enforce_isolation[idx]) {
1865 if (adev->kfd.init_complete)
1866 amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
1867 }
1868 mutex_unlock(&adev->enforce_isolation_mutex);
1869 }
1870
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)1871 void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
1872 {
1873 struct amdgpu_device *adev = ring->adev;
1874 u32 idx;
1875
1876 if (!adev->gfx.enable_cleaner_shader)
1877 return;
1878
1879 if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
1880 idx = 0;
1881 else
1882 idx = ring->xcp_id;
1883
1884 if (idx >= MAX_XCP)
1885 return;
1886
1887 mutex_lock(&adev->enforce_isolation_mutex);
1888 if (adev->enforce_isolation[idx]) {
1889 if (adev->kfd.init_complete)
1890 amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
1891 }
1892 mutex_unlock(&adev->enforce_isolation_mutex);
1893 }
1894