xref: /dragonfly/sys/dev/drm/radeon/cik.c (revision 3f2dd94a569761201b5b0a18b2f697f97fe1b9dc)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 
37 #define SH_MEM_CONFIG_GFX_DEFAULT \
38           ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
39 
40 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
47 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
48 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
49 
50 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
51 MODULE_FIRMWARE("radeon/bonaire_me.bin");
52 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
53 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
54 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
55 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
56 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
57 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
58 MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
59 
60 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
66 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
68 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
71 MODULE_FIRMWARE("radeon/hawaii_me.bin");
72 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
73 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
74 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
75 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
76 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
77 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
78 MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
79 
80 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
82 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
83 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
84 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
85 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
86 
87 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
88 MODULE_FIRMWARE("radeon/kaveri_me.bin");
89 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
90 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
91 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
92 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
93 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
94 
95 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
96 MODULE_FIRMWARE("radeon/KABINI_me.bin");
97 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
98 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
99 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
100 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
101 
102 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
103 MODULE_FIRMWARE("radeon/kabini_me.bin");
104 MODULE_FIRMWARE("radeon/kabini_ce.bin");
105 MODULE_FIRMWARE("radeon/kabini_mec.bin");
106 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
107 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
108 
109 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
111 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
112 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
113 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
114 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
115 
116 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
117 MODULE_FIRMWARE("radeon/mullins_me.bin");
118 MODULE_FIRMWARE("radeon/mullins_ce.bin");
119 MODULE_FIRMWARE("radeon/mullins_mec.bin");
120 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
121 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
122 
123 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
124 static void cik_rlc_stop(struct radeon_device *rdev);
125 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
126 static void cik_program_aspm(struct radeon_device *rdev);
127 static void cik_init_pg(struct radeon_device *rdev);
128 static void cik_init_cg(struct radeon_device *rdev);
129 static void cik_fini_pg(struct radeon_device *rdev);
130 static void cik_fini_cg(struct radeon_device *rdev);
131 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
132                                                     bool enable);
133 
134 /**
135  * cik_get_allowed_info_register - fetch the register for the info ioctl
136  *
137  * @rdev: radeon_device pointer
138  * @reg: register offset in bytes
139  * @val: register value
140  *
141  * Returns 0 for success or -EINVAL for an invalid register
142  *
143  */
cik_get_allowed_info_register(struct radeon_device * rdev,u32 reg,u32 * val)144 int cik_get_allowed_info_register(struct radeon_device *rdev,
145                                           u32 reg, u32 *val)
146 {
147           switch (reg) {
148           case GRBM_STATUS:
149           case GRBM_STATUS2:
150           case GRBM_STATUS_SE0:
151           case GRBM_STATUS_SE1:
152           case GRBM_STATUS_SE2:
153           case GRBM_STATUS_SE3:
154           case SRBM_STATUS:
155           case SRBM_STATUS2:
156           case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
157           case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
158           case UVD_STATUS:
159           /* TODO VCE */
160                     *val = RREG32(reg);
161                     return 0;
162           default:
163                     return -EINVAL;
164           }
165 }
166 
167 /*
168  * Indirect registers accessor
169  */
cik_didt_rreg(struct radeon_device * rdev,u32 reg)170 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
171 {
172           unsigned long flags;
173           u32 r;
174 
175           spin_lock_irqsave(&rdev->didt_idx_lock, flags);
176           WREG32(CIK_DIDT_IND_INDEX, (reg));
177           r = RREG32(CIK_DIDT_IND_DATA);
178           spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
179           return r;
180 }
181 
cik_didt_wreg(struct radeon_device * rdev,u32 reg,u32 v)182 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
183 {
184           unsigned long flags;
185 
186           spin_lock_irqsave(&rdev->didt_idx_lock, flags);
187           WREG32(CIK_DIDT_IND_INDEX, (reg));
188           WREG32(CIK_DIDT_IND_DATA, (v));
189           spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
190 }
191 
192 /* get temperature in millidegrees */
ci_get_temp(struct radeon_device * rdev)193 int ci_get_temp(struct radeon_device *rdev)
194 {
195           u32 temp;
196           int actual_temp = 0;
197 
198           temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
199                     CTF_TEMP_SHIFT;
200 
201           if (temp & 0x200)
202                     actual_temp = 255;
203           else
204                     actual_temp = temp & 0x1ff;
205 
206           actual_temp = actual_temp * 1000;
207 
208           return actual_temp;
209 }
210 
211 /* get temperature in millidegrees */
kv_get_temp(struct radeon_device * rdev)212 int kv_get_temp(struct radeon_device *rdev)
213 {
214           u32 temp;
215           int actual_temp = 0;
216 
217           temp = RREG32_SMC(0xC0300E0C);
218 
219           if (temp)
220                     actual_temp = (temp / 8) - 49;
221           else
222                     actual_temp = 0;
223 
224           actual_temp = actual_temp * 1000;
225 
226           return actual_temp;
227 }
228 
229 /*
230  * Indirect registers accessor
231  */
cik_pciep_rreg(struct radeon_device * rdev,u32 reg)232 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
233 {
234           unsigned long flags;
235           u32 r;
236 
237           spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
238           WREG32(PCIE_INDEX, reg);
239           (void)RREG32(PCIE_INDEX);
240           r = RREG32(PCIE_DATA);
241           spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
242           return r;
243 }
244 
cik_pciep_wreg(struct radeon_device * rdev,u32 reg,u32 v)245 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
246 {
247           unsigned long flags;
248 
249           spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
250           WREG32(PCIE_INDEX, reg);
251           (void)RREG32(PCIE_INDEX);
252           WREG32(PCIE_DATA, v);
253           (void)RREG32(PCIE_DATA);
254           spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
255 }
256 
257 static const u32 spectre_rlc_save_restore_register_list[] =
258 {
259           (0x0e00 << 16) | (0xc12c >> 2),
260           0x00000000,
261           (0x0e00 << 16) | (0xc140 >> 2),
262           0x00000000,
263           (0x0e00 << 16) | (0xc150 >> 2),
264           0x00000000,
265           (0x0e00 << 16) | (0xc15c >> 2),
266           0x00000000,
267           (0x0e00 << 16) | (0xc168 >> 2),
268           0x00000000,
269           (0x0e00 << 16) | (0xc170 >> 2),
270           0x00000000,
271           (0x0e00 << 16) | (0xc178 >> 2),
272           0x00000000,
273           (0x0e00 << 16) | (0xc204 >> 2),
274           0x00000000,
275           (0x0e00 << 16) | (0xc2b4 >> 2),
276           0x00000000,
277           (0x0e00 << 16) | (0xc2b8 >> 2),
278           0x00000000,
279           (0x0e00 << 16) | (0xc2bc >> 2),
280           0x00000000,
281           (0x0e00 << 16) | (0xc2c0 >> 2),
282           0x00000000,
283           (0x0e00 << 16) | (0x8228 >> 2),
284           0x00000000,
285           (0x0e00 << 16) | (0x829c >> 2),
286           0x00000000,
287           (0x0e00 << 16) | (0x869c >> 2),
288           0x00000000,
289           (0x0600 << 16) | (0x98f4 >> 2),
290           0x00000000,
291           (0x0e00 << 16) | (0x98f8 >> 2),
292           0x00000000,
293           (0x0e00 << 16) | (0x9900 >> 2),
294           0x00000000,
295           (0x0e00 << 16) | (0xc260 >> 2),
296           0x00000000,
297           (0x0e00 << 16) | (0x90e8 >> 2),
298           0x00000000,
299           (0x0e00 << 16) | (0x3c000 >> 2),
300           0x00000000,
301           (0x0e00 << 16) | (0x3c00c >> 2),
302           0x00000000,
303           (0x0e00 << 16) | (0x8c1c >> 2),
304           0x00000000,
305           (0x0e00 << 16) | (0x9700 >> 2),
306           0x00000000,
307           (0x0e00 << 16) | (0xcd20 >> 2),
308           0x00000000,
309           (0x4e00 << 16) | (0xcd20 >> 2),
310           0x00000000,
311           (0x5e00 << 16) | (0xcd20 >> 2),
312           0x00000000,
313           (0x6e00 << 16) | (0xcd20 >> 2),
314           0x00000000,
315           (0x7e00 << 16) | (0xcd20 >> 2),
316           0x00000000,
317           (0x8e00 << 16) | (0xcd20 >> 2),
318           0x00000000,
319           (0x9e00 << 16) | (0xcd20 >> 2),
320           0x00000000,
321           (0xae00 << 16) | (0xcd20 >> 2),
322           0x00000000,
323           (0xbe00 << 16) | (0xcd20 >> 2),
324           0x00000000,
325           (0x0e00 << 16) | (0x89bc >> 2),
326           0x00000000,
327           (0x0e00 << 16) | (0x8900 >> 2),
328           0x00000000,
329           0x3,
330           (0x0e00 << 16) | (0xc130 >> 2),
331           0x00000000,
332           (0x0e00 << 16) | (0xc134 >> 2),
333           0x00000000,
334           (0x0e00 << 16) | (0xc1fc >> 2),
335           0x00000000,
336           (0x0e00 << 16) | (0xc208 >> 2),
337           0x00000000,
338           (0x0e00 << 16) | (0xc264 >> 2),
339           0x00000000,
340           (0x0e00 << 16) | (0xc268 >> 2),
341           0x00000000,
342           (0x0e00 << 16) | (0xc26c >> 2),
343           0x00000000,
344           (0x0e00 << 16) | (0xc270 >> 2),
345           0x00000000,
346           (0x0e00 << 16) | (0xc274 >> 2),
347           0x00000000,
348           (0x0e00 << 16) | (0xc278 >> 2),
349           0x00000000,
350           (0x0e00 << 16) | (0xc27c >> 2),
351           0x00000000,
352           (0x0e00 << 16) | (0xc280 >> 2),
353           0x00000000,
354           (0x0e00 << 16) | (0xc284 >> 2),
355           0x00000000,
356           (0x0e00 << 16) | (0xc288 >> 2),
357           0x00000000,
358           (0x0e00 << 16) | (0xc28c >> 2),
359           0x00000000,
360           (0x0e00 << 16) | (0xc290 >> 2),
361           0x00000000,
362           (0x0e00 << 16) | (0xc294 >> 2),
363           0x00000000,
364           (0x0e00 << 16) | (0xc298 >> 2),
365           0x00000000,
366           (0x0e00 << 16) | (0xc29c >> 2),
367           0x00000000,
368           (0x0e00 << 16) | (0xc2a0 >> 2),
369           0x00000000,
370           (0x0e00 << 16) | (0xc2a4 >> 2),
371           0x00000000,
372           (0x0e00 << 16) | (0xc2a8 >> 2),
373           0x00000000,
374           (0x0e00 << 16) | (0xc2ac  >> 2),
375           0x00000000,
376           (0x0e00 << 16) | (0xc2b0 >> 2),
377           0x00000000,
378           (0x0e00 << 16) | (0x301d0 >> 2),
379           0x00000000,
380           (0x0e00 << 16) | (0x30238 >> 2),
381           0x00000000,
382           (0x0e00 << 16) | (0x30250 >> 2),
383           0x00000000,
384           (0x0e00 << 16) | (0x30254 >> 2),
385           0x00000000,
386           (0x0e00 << 16) | (0x30258 >> 2),
387           0x00000000,
388           (0x0e00 << 16) | (0x3025c >> 2),
389           0x00000000,
390           (0x4e00 << 16) | (0xc900 >> 2),
391           0x00000000,
392           (0x5e00 << 16) | (0xc900 >> 2),
393           0x00000000,
394           (0x6e00 << 16) | (0xc900 >> 2),
395           0x00000000,
396           (0x7e00 << 16) | (0xc900 >> 2),
397           0x00000000,
398           (0x8e00 << 16) | (0xc900 >> 2),
399           0x00000000,
400           (0x9e00 << 16) | (0xc900 >> 2),
401           0x00000000,
402           (0xae00 << 16) | (0xc900 >> 2),
403           0x00000000,
404           (0xbe00 << 16) | (0xc900 >> 2),
405           0x00000000,
406           (0x4e00 << 16) | (0xc904 >> 2),
407           0x00000000,
408           (0x5e00 << 16) | (0xc904 >> 2),
409           0x00000000,
410           (0x6e00 << 16) | (0xc904 >> 2),
411           0x00000000,
412           (0x7e00 << 16) | (0xc904 >> 2),
413           0x00000000,
414           (0x8e00 << 16) | (0xc904 >> 2),
415           0x00000000,
416           (0x9e00 << 16) | (0xc904 >> 2),
417           0x00000000,
418           (0xae00 << 16) | (0xc904 >> 2),
419           0x00000000,
420           (0xbe00 << 16) | (0xc904 >> 2),
421           0x00000000,
422           (0x4e00 << 16) | (0xc908 >> 2),
423           0x00000000,
424           (0x5e00 << 16) | (0xc908 >> 2),
425           0x00000000,
426           (0x6e00 << 16) | (0xc908 >> 2),
427           0x00000000,
428           (0x7e00 << 16) | (0xc908 >> 2),
429           0x00000000,
430           (0x8e00 << 16) | (0xc908 >> 2),
431           0x00000000,
432           (0x9e00 << 16) | (0xc908 >> 2),
433           0x00000000,
434           (0xae00 << 16) | (0xc908 >> 2),
435           0x00000000,
436           (0xbe00 << 16) | (0xc908 >> 2),
437           0x00000000,
438           (0x4e00 << 16) | (0xc90c >> 2),
439           0x00000000,
440           (0x5e00 << 16) | (0xc90c >> 2),
441           0x00000000,
442           (0x6e00 << 16) | (0xc90c >> 2),
443           0x00000000,
444           (0x7e00 << 16) | (0xc90c >> 2),
445           0x00000000,
446           (0x8e00 << 16) | (0xc90c >> 2),
447           0x00000000,
448           (0x9e00 << 16) | (0xc90c >> 2),
449           0x00000000,
450           (0xae00 << 16) | (0xc90c >> 2),
451           0x00000000,
452           (0xbe00 << 16) | (0xc90c >> 2),
453           0x00000000,
454           (0x4e00 << 16) | (0xc910 >> 2),
455           0x00000000,
456           (0x5e00 << 16) | (0xc910 >> 2),
457           0x00000000,
458           (0x6e00 << 16) | (0xc910 >> 2),
459           0x00000000,
460           (0x7e00 << 16) | (0xc910 >> 2),
461           0x00000000,
462           (0x8e00 << 16) | (0xc910 >> 2),
463           0x00000000,
464           (0x9e00 << 16) | (0xc910 >> 2),
465           0x00000000,
466           (0xae00 << 16) | (0xc910 >> 2),
467           0x00000000,
468           (0xbe00 << 16) | (0xc910 >> 2),
469           0x00000000,
470           (0x0e00 << 16) | (0xc99c >> 2),
471           0x00000000,
472           (0x0e00 << 16) | (0x9834 >> 2),
473           0x00000000,
474           (0x0000 << 16) | (0x30f00 >> 2),
475           0x00000000,
476           (0x0001 << 16) | (0x30f00 >> 2),
477           0x00000000,
478           (0x0000 << 16) | (0x30f04 >> 2),
479           0x00000000,
480           (0x0001 << 16) | (0x30f04 >> 2),
481           0x00000000,
482           (0x0000 << 16) | (0x30f08 >> 2),
483           0x00000000,
484           (0x0001 << 16) | (0x30f08 >> 2),
485           0x00000000,
486           (0x0000 << 16) | (0x30f0c >> 2),
487           0x00000000,
488           (0x0001 << 16) | (0x30f0c >> 2),
489           0x00000000,
490           (0x0600 << 16) | (0x9b7c >> 2),
491           0x00000000,
492           (0x0e00 << 16) | (0x8a14 >> 2),
493           0x00000000,
494           (0x0e00 << 16) | (0x8a18 >> 2),
495           0x00000000,
496           (0x0600 << 16) | (0x30a00 >> 2),
497           0x00000000,
498           (0x0e00 << 16) | (0x8bf0 >> 2),
499           0x00000000,
500           (0x0e00 << 16) | (0x8bcc >> 2),
501           0x00000000,
502           (0x0e00 << 16) | (0x8b24 >> 2),
503           0x00000000,
504           (0x0e00 << 16) | (0x30a04 >> 2),
505           0x00000000,
506           (0x0600 << 16) | (0x30a10 >> 2),
507           0x00000000,
508           (0x0600 << 16) | (0x30a14 >> 2),
509           0x00000000,
510           (0x0600 << 16) | (0x30a18 >> 2),
511           0x00000000,
512           (0x0600 << 16) | (0x30a2c >> 2),
513           0x00000000,
514           (0x0e00 << 16) | (0xc700 >> 2),
515           0x00000000,
516           (0x0e00 << 16) | (0xc704 >> 2),
517           0x00000000,
518           (0x0e00 << 16) | (0xc708 >> 2),
519           0x00000000,
520           (0x0e00 << 16) | (0xc768 >> 2),
521           0x00000000,
522           (0x0400 << 16) | (0xc770 >> 2),
523           0x00000000,
524           (0x0400 << 16) | (0xc774 >> 2),
525           0x00000000,
526           (0x0400 << 16) | (0xc778 >> 2),
527           0x00000000,
528           (0x0400 << 16) | (0xc77c >> 2),
529           0x00000000,
530           (0x0400 << 16) | (0xc780 >> 2),
531           0x00000000,
532           (0x0400 << 16) | (0xc784 >> 2),
533           0x00000000,
534           (0x0400 << 16) | (0xc788 >> 2),
535           0x00000000,
536           (0x0400 << 16) | (0xc78c >> 2),
537           0x00000000,
538           (0x0400 << 16) | (0xc798 >> 2),
539           0x00000000,
540           (0x0400 << 16) | (0xc79c >> 2),
541           0x00000000,
542           (0x0400 << 16) | (0xc7a0 >> 2),
543           0x00000000,
544           (0x0400 << 16) | (0xc7a4 >> 2),
545           0x00000000,
546           (0x0400 << 16) | (0xc7a8 >> 2),
547           0x00000000,
548           (0x0400 << 16) | (0xc7ac >> 2),
549           0x00000000,
550           (0x0400 << 16) | (0xc7b0 >> 2),
551           0x00000000,
552           (0x0400 << 16) | (0xc7b4 >> 2),
553           0x00000000,
554           (0x0e00 << 16) | (0x9100 >> 2),
555           0x00000000,
556           (0x0e00 << 16) | (0x3c010 >> 2),
557           0x00000000,
558           (0x0e00 << 16) | (0x92a8 >> 2),
559           0x00000000,
560           (0x0e00 << 16) | (0x92ac >> 2),
561           0x00000000,
562           (0x0e00 << 16) | (0x92b4 >> 2),
563           0x00000000,
564           (0x0e00 << 16) | (0x92b8 >> 2),
565           0x00000000,
566           (0x0e00 << 16) | (0x92bc >> 2),
567           0x00000000,
568           (0x0e00 << 16) | (0x92c0 >> 2),
569           0x00000000,
570           (0x0e00 << 16) | (0x92c4 >> 2),
571           0x00000000,
572           (0x0e00 << 16) | (0x92c8 >> 2),
573           0x00000000,
574           (0x0e00 << 16) | (0x92cc >> 2),
575           0x00000000,
576           (0x0e00 << 16) | (0x92d0 >> 2),
577           0x00000000,
578           (0x0e00 << 16) | (0x8c00 >> 2),
579           0x00000000,
580           (0x0e00 << 16) | (0x8c04 >> 2),
581           0x00000000,
582           (0x0e00 << 16) | (0x8c20 >> 2),
583           0x00000000,
584           (0x0e00 << 16) | (0x8c38 >> 2),
585           0x00000000,
586           (0x0e00 << 16) | (0x8c3c >> 2),
587           0x00000000,
588           (0x0e00 << 16) | (0xae00 >> 2),
589           0x00000000,
590           (0x0e00 << 16) | (0x9604 >> 2),
591           0x00000000,
592           (0x0e00 << 16) | (0xac08 >> 2),
593           0x00000000,
594           (0x0e00 << 16) | (0xac0c >> 2),
595           0x00000000,
596           (0x0e00 << 16) | (0xac10 >> 2),
597           0x00000000,
598           (0x0e00 << 16) | (0xac14 >> 2),
599           0x00000000,
600           (0x0e00 << 16) | (0xac58 >> 2),
601           0x00000000,
602           (0x0e00 << 16) | (0xac68 >> 2),
603           0x00000000,
604           (0x0e00 << 16) | (0xac6c >> 2),
605           0x00000000,
606           (0x0e00 << 16) | (0xac70 >> 2),
607           0x00000000,
608           (0x0e00 << 16) | (0xac74 >> 2),
609           0x00000000,
610           (0x0e00 << 16) | (0xac78 >> 2),
611           0x00000000,
612           (0x0e00 << 16) | (0xac7c >> 2),
613           0x00000000,
614           (0x0e00 << 16) | (0xac80 >> 2),
615           0x00000000,
616           (0x0e00 << 16) | (0xac84 >> 2),
617           0x00000000,
618           (0x0e00 << 16) | (0xac88 >> 2),
619           0x00000000,
620           (0x0e00 << 16) | (0xac8c >> 2),
621           0x00000000,
622           (0x0e00 << 16) | (0x970c >> 2),
623           0x00000000,
624           (0x0e00 << 16) | (0x9714 >> 2),
625           0x00000000,
626           (0x0e00 << 16) | (0x9718 >> 2),
627           0x00000000,
628           (0x0e00 << 16) | (0x971c >> 2),
629           0x00000000,
630           (0x0e00 << 16) | (0x31068 >> 2),
631           0x00000000,
632           (0x4e00 << 16) | (0x31068 >> 2),
633           0x00000000,
634           (0x5e00 << 16) | (0x31068 >> 2),
635           0x00000000,
636           (0x6e00 << 16) | (0x31068 >> 2),
637           0x00000000,
638           (0x7e00 << 16) | (0x31068 >> 2),
639           0x00000000,
640           (0x8e00 << 16) | (0x31068 >> 2),
641           0x00000000,
642           (0x9e00 << 16) | (0x31068 >> 2),
643           0x00000000,
644           (0xae00 << 16) | (0x31068 >> 2),
645           0x00000000,
646           (0xbe00 << 16) | (0x31068 >> 2),
647           0x00000000,
648           (0x0e00 << 16) | (0xcd10 >> 2),
649           0x00000000,
650           (0x0e00 << 16) | (0xcd14 >> 2),
651           0x00000000,
652           (0x0e00 << 16) | (0x88b0 >> 2),
653           0x00000000,
654           (0x0e00 << 16) | (0x88b4 >> 2),
655           0x00000000,
656           (0x0e00 << 16) | (0x88b8 >> 2),
657           0x00000000,
658           (0x0e00 << 16) | (0x88bc >> 2),
659           0x00000000,
660           (0x0400 << 16) | (0x89c0 >> 2),
661           0x00000000,
662           (0x0e00 << 16) | (0x88c4 >> 2),
663           0x00000000,
664           (0x0e00 << 16) | (0x88c8 >> 2),
665           0x00000000,
666           (0x0e00 << 16) | (0x88d0 >> 2),
667           0x00000000,
668           (0x0e00 << 16) | (0x88d4 >> 2),
669           0x00000000,
670           (0x0e00 << 16) | (0x88d8 >> 2),
671           0x00000000,
672           (0x0e00 << 16) | (0x8980 >> 2),
673           0x00000000,
674           (0x0e00 << 16) | (0x30938 >> 2),
675           0x00000000,
676           (0x0e00 << 16) | (0x3093c >> 2),
677           0x00000000,
678           (0x0e00 << 16) | (0x30940 >> 2),
679           0x00000000,
680           (0x0e00 << 16) | (0x89a0 >> 2),
681           0x00000000,
682           (0x0e00 << 16) | (0x30900 >> 2),
683           0x00000000,
684           (0x0e00 << 16) | (0x30904 >> 2),
685           0x00000000,
686           (0x0e00 << 16) | (0x89b4 >> 2),
687           0x00000000,
688           (0x0e00 << 16) | (0x3c210 >> 2),
689           0x00000000,
690           (0x0e00 << 16) | (0x3c214 >> 2),
691           0x00000000,
692           (0x0e00 << 16) | (0x3c218 >> 2),
693           0x00000000,
694           (0x0e00 << 16) | (0x8904 >> 2),
695           0x00000000,
696           0x5,
697           (0x0e00 << 16) | (0x8c28 >> 2),
698           (0x0e00 << 16) | (0x8c2c >> 2),
699           (0x0e00 << 16) | (0x8c30 >> 2),
700           (0x0e00 << 16) | (0x8c34 >> 2),
701           (0x0e00 << 16) | (0x9600 >> 2),
702 };
703 
704 static const u32 kalindi_rlc_save_restore_register_list[] =
705 {
706           (0x0e00 << 16) | (0xc12c >> 2),
707           0x00000000,
708           (0x0e00 << 16) | (0xc140 >> 2),
709           0x00000000,
710           (0x0e00 << 16) | (0xc150 >> 2),
711           0x00000000,
712           (0x0e00 << 16) | (0xc15c >> 2),
713           0x00000000,
714           (0x0e00 << 16) | (0xc168 >> 2),
715           0x00000000,
716           (0x0e00 << 16) | (0xc170 >> 2),
717           0x00000000,
718           (0x0e00 << 16) | (0xc204 >> 2),
719           0x00000000,
720           (0x0e00 << 16) | (0xc2b4 >> 2),
721           0x00000000,
722           (0x0e00 << 16) | (0xc2b8 >> 2),
723           0x00000000,
724           (0x0e00 << 16) | (0xc2bc >> 2),
725           0x00000000,
726           (0x0e00 << 16) | (0xc2c0 >> 2),
727           0x00000000,
728           (0x0e00 << 16) | (0x8228 >> 2),
729           0x00000000,
730           (0x0e00 << 16) | (0x829c >> 2),
731           0x00000000,
732           (0x0e00 << 16) | (0x869c >> 2),
733           0x00000000,
734           (0x0600 << 16) | (0x98f4 >> 2),
735           0x00000000,
736           (0x0e00 << 16) | (0x98f8 >> 2),
737           0x00000000,
738           (0x0e00 << 16) | (0x9900 >> 2),
739           0x00000000,
740           (0x0e00 << 16) | (0xc260 >> 2),
741           0x00000000,
742           (0x0e00 << 16) | (0x90e8 >> 2),
743           0x00000000,
744           (0x0e00 << 16) | (0x3c000 >> 2),
745           0x00000000,
746           (0x0e00 << 16) | (0x3c00c >> 2),
747           0x00000000,
748           (0x0e00 << 16) | (0x8c1c >> 2),
749           0x00000000,
750           (0x0e00 << 16) | (0x9700 >> 2),
751           0x00000000,
752           (0x0e00 << 16) | (0xcd20 >> 2),
753           0x00000000,
754           (0x4e00 << 16) | (0xcd20 >> 2),
755           0x00000000,
756           (0x5e00 << 16) | (0xcd20 >> 2),
757           0x00000000,
758           (0x6e00 << 16) | (0xcd20 >> 2),
759           0x00000000,
760           (0x7e00 << 16) | (0xcd20 >> 2),
761           0x00000000,
762           (0x0e00 << 16) | (0x89bc >> 2),
763           0x00000000,
764           (0x0e00 << 16) | (0x8900 >> 2),
765           0x00000000,
766           0x3,
767           (0x0e00 << 16) | (0xc130 >> 2),
768           0x00000000,
769           (0x0e00 << 16) | (0xc134 >> 2),
770           0x00000000,
771           (0x0e00 << 16) | (0xc1fc >> 2),
772           0x00000000,
773           (0x0e00 << 16) | (0xc208 >> 2),
774           0x00000000,
775           (0x0e00 << 16) | (0xc264 >> 2),
776           0x00000000,
777           (0x0e00 << 16) | (0xc268 >> 2),
778           0x00000000,
779           (0x0e00 << 16) | (0xc26c >> 2),
780           0x00000000,
781           (0x0e00 << 16) | (0xc270 >> 2),
782           0x00000000,
783           (0x0e00 << 16) | (0xc274 >> 2),
784           0x00000000,
785           (0x0e00 << 16) | (0xc28c >> 2),
786           0x00000000,
787           (0x0e00 << 16) | (0xc290 >> 2),
788           0x00000000,
789           (0x0e00 << 16) | (0xc294 >> 2),
790           0x00000000,
791           (0x0e00 << 16) | (0xc298 >> 2),
792           0x00000000,
793           (0x0e00 << 16) | (0xc2a0 >> 2),
794           0x00000000,
795           (0x0e00 << 16) | (0xc2a4 >> 2),
796           0x00000000,
797           (0x0e00 << 16) | (0xc2a8 >> 2),
798           0x00000000,
799           (0x0e00 << 16) | (0xc2ac >> 2),
800           0x00000000,
801           (0x0e00 << 16) | (0x301d0 >> 2),
802           0x00000000,
803           (0x0e00 << 16) | (0x30238 >> 2),
804           0x00000000,
805           (0x0e00 << 16) | (0x30250 >> 2),
806           0x00000000,
807           (0x0e00 << 16) | (0x30254 >> 2),
808           0x00000000,
809           (0x0e00 << 16) | (0x30258 >> 2),
810           0x00000000,
811           (0x0e00 << 16) | (0x3025c >> 2),
812           0x00000000,
813           (0x4e00 << 16) | (0xc900 >> 2),
814           0x00000000,
815           (0x5e00 << 16) | (0xc900 >> 2),
816           0x00000000,
817           (0x6e00 << 16) | (0xc900 >> 2),
818           0x00000000,
819           (0x7e00 << 16) | (0xc900 >> 2),
820           0x00000000,
821           (0x4e00 << 16) | (0xc904 >> 2),
822           0x00000000,
823           (0x5e00 << 16) | (0xc904 >> 2),
824           0x00000000,
825           (0x6e00 << 16) | (0xc904 >> 2),
826           0x00000000,
827           (0x7e00 << 16) | (0xc904 >> 2),
828           0x00000000,
829           (0x4e00 << 16) | (0xc908 >> 2),
830           0x00000000,
831           (0x5e00 << 16) | (0xc908 >> 2),
832           0x00000000,
833           (0x6e00 << 16) | (0xc908 >> 2),
834           0x00000000,
835           (0x7e00 << 16) | (0xc908 >> 2),
836           0x00000000,
837           (0x4e00 << 16) | (0xc90c >> 2),
838           0x00000000,
839           (0x5e00 << 16) | (0xc90c >> 2),
840           0x00000000,
841           (0x6e00 << 16) | (0xc90c >> 2),
842           0x00000000,
843           (0x7e00 << 16) | (0xc90c >> 2),
844           0x00000000,
845           (0x4e00 << 16) | (0xc910 >> 2),
846           0x00000000,
847           (0x5e00 << 16) | (0xc910 >> 2),
848           0x00000000,
849           (0x6e00 << 16) | (0xc910 >> 2),
850           0x00000000,
851           (0x7e00 << 16) | (0xc910 >> 2),
852           0x00000000,
853           (0x0e00 << 16) | (0xc99c >> 2),
854           0x00000000,
855           (0x0e00 << 16) | (0x9834 >> 2),
856           0x00000000,
857           (0x0000 << 16) | (0x30f00 >> 2),
858           0x00000000,
859           (0x0000 << 16) | (0x30f04 >> 2),
860           0x00000000,
861           (0x0000 << 16) | (0x30f08 >> 2),
862           0x00000000,
863           (0x0000 << 16) | (0x30f0c >> 2),
864           0x00000000,
865           (0x0600 << 16) | (0x9b7c >> 2),
866           0x00000000,
867           (0x0e00 << 16) | (0x8a14 >> 2),
868           0x00000000,
869           (0x0e00 << 16) | (0x8a18 >> 2),
870           0x00000000,
871           (0x0600 << 16) | (0x30a00 >> 2),
872           0x00000000,
873           (0x0e00 << 16) | (0x8bf0 >> 2),
874           0x00000000,
875           (0x0e00 << 16) | (0x8bcc >> 2),
876           0x00000000,
877           (0x0e00 << 16) | (0x8b24 >> 2),
878           0x00000000,
879           (0x0e00 << 16) | (0x30a04 >> 2),
880           0x00000000,
881           (0x0600 << 16) | (0x30a10 >> 2),
882           0x00000000,
883           (0x0600 << 16) | (0x30a14 >> 2),
884           0x00000000,
885           (0x0600 << 16) | (0x30a18 >> 2),
886           0x00000000,
887           (0x0600 << 16) | (0x30a2c >> 2),
888           0x00000000,
889           (0x0e00 << 16) | (0xc700 >> 2),
890           0x00000000,
891           (0x0e00 << 16) | (0xc704 >> 2),
892           0x00000000,
893           (0x0e00 << 16) | (0xc708 >> 2),
894           0x00000000,
895           (0x0e00 << 16) | (0xc768 >> 2),
896           0x00000000,
897           (0x0400 << 16) | (0xc770 >> 2),
898           0x00000000,
899           (0x0400 << 16) | (0xc774 >> 2),
900           0x00000000,
901           (0x0400 << 16) | (0xc798 >> 2),
902           0x00000000,
903           (0x0400 << 16) | (0xc79c >> 2),
904           0x00000000,
905           (0x0e00 << 16) | (0x9100 >> 2),
906           0x00000000,
907           (0x0e00 << 16) | (0x3c010 >> 2),
908           0x00000000,
909           (0x0e00 << 16) | (0x8c00 >> 2),
910           0x00000000,
911           (0x0e00 << 16) | (0x8c04 >> 2),
912           0x00000000,
913           (0x0e00 << 16) | (0x8c20 >> 2),
914           0x00000000,
915           (0x0e00 << 16) | (0x8c38 >> 2),
916           0x00000000,
917           (0x0e00 << 16) | (0x8c3c >> 2),
918           0x00000000,
919           (0x0e00 << 16) | (0xae00 >> 2),
920           0x00000000,
921           (0x0e00 << 16) | (0x9604 >> 2),
922           0x00000000,
923           (0x0e00 << 16) | (0xac08 >> 2),
924           0x00000000,
925           (0x0e00 << 16) | (0xac0c >> 2),
926           0x00000000,
927           (0x0e00 << 16) | (0xac10 >> 2),
928           0x00000000,
929           (0x0e00 << 16) | (0xac14 >> 2),
930           0x00000000,
931           (0x0e00 << 16) | (0xac58 >> 2),
932           0x00000000,
933           (0x0e00 << 16) | (0xac68 >> 2),
934           0x00000000,
935           (0x0e00 << 16) | (0xac6c >> 2),
936           0x00000000,
937           (0x0e00 << 16) | (0xac70 >> 2),
938           0x00000000,
939           (0x0e00 << 16) | (0xac74 >> 2),
940           0x00000000,
941           (0x0e00 << 16) | (0xac78 >> 2),
942           0x00000000,
943           (0x0e00 << 16) | (0xac7c >> 2),
944           0x00000000,
945           (0x0e00 << 16) | (0xac80 >> 2),
946           0x00000000,
947           (0x0e00 << 16) | (0xac84 >> 2),
948           0x00000000,
949           (0x0e00 << 16) | (0xac88 >> 2),
950           0x00000000,
951           (0x0e00 << 16) | (0xac8c >> 2),
952           0x00000000,
953           (0x0e00 << 16) | (0x970c >> 2),
954           0x00000000,
955           (0x0e00 << 16) | (0x9714 >> 2),
956           0x00000000,
957           (0x0e00 << 16) | (0x9718 >> 2),
958           0x00000000,
959           (0x0e00 << 16) | (0x971c >> 2),
960           0x00000000,
961           (0x0e00 << 16) | (0x31068 >> 2),
962           0x00000000,
963           (0x4e00 << 16) | (0x31068 >> 2),
964           0x00000000,
965           (0x5e00 << 16) | (0x31068 >> 2),
966           0x00000000,
967           (0x6e00 << 16) | (0x31068 >> 2),
968           0x00000000,
969           (0x7e00 << 16) | (0x31068 >> 2),
970           0x00000000,
971           (0x0e00 << 16) | (0xcd10 >> 2),
972           0x00000000,
973           (0x0e00 << 16) | (0xcd14 >> 2),
974           0x00000000,
975           (0x0e00 << 16) | (0x88b0 >> 2),
976           0x00000000,
977           (0x0e00 << 16) | (0x88b4 >> 2),
978           0x00000000,
979           (0x0e00 << 16) | (0x88b8 >> 2),
980           0x00000000,
981           (0x0e00 << 16) | (0x88bc >> 2),
982           0x00000000,
983           (0x0400 << 16) | (0x89c0 >> 2),
984           0x00000000,
985           (0x0e00 << 16) | (0x88c4 >> 2),
986           0x00000000,
987           (0x0e00 << 16) | (0x88c8 >> 2),
988           0x00000000,
989           (0x0e00 << 16) | (0x88d0 >> 2),
990           0x00000000,
991           (0x0e00 << 16) | (0x88d4 >> 2),
992           0x00000000,
993           (0x0e00 << 16) | (0x88d8 >> 2),
994           0x00000000,
995           (0x0e00 << 16) | (0x8980 >> 2),
996           0x00000000,
997           (0x0e00 << 16) | (0x30938 >> 2),
998           0x00000000,
999           (0x0e00 << 16) | (0x3093c >> 2),
1000           0x00000000,
1001           (0x0e00 << 16) | (0x30940 >> 2),
1002           0x00000000,
1003           (0x0e00 << 16) | (0x89a0 >> 2),
1004           0x00000000,
1005           (0x0e00 << 16) | (0x30900 >> 2),
1006           0x00000000,
1007           (0x0e00 << 16) | (0x30904 >> 2),
1008           0x00000000,
1009           (0x0e00 << 16) | (0x89b4 >> 2),
1010           0x00000000,
1011           (0x0e00 << 16) | (0x3e1fc >> 2),
1012           0x00000000,
1013           (0x0e00 << 16) | (0x3c210 >> 2),
1014           0x00000000,
1015           (0x0e00 << 16) | (0x3c214 >> 2),
1016           0x00000000,
1017           (0x0e00 << 16) | (0x3c218 >> 2),
1018           0x00000000,
1019           (0x0e00 << 16) | (0x8904 >> 2),
1020           0x00000000,
1021           0x5,
1022           (0x0e00 << 16) | (0x8c28 >> 2),
1023           (0x0e00 << 16) | (0x8c2c >> 2),
1024           (0x0e00 << 16) | (0x8c30 >> 2),
1025           (0x0e00 << 16) | (0x8c34 >> 2),
1026           (0x0e00 << 16) | (0x9600 >> 2),
1027 };
1028 
1029 static const u32 bonaire_golden_spm_registers[] =
1030 {
1031           0x30800, 0xe0ffffff, 0xe0000000
1032 };
1033 
1034 static const u32 bonaire_golden_common_registers[] =
1035 {
1036           0xc770, 0xffffffff, 0x00000800,
1037           0xc774, 0xffffffff, 0x00000800,
1038           0xc798, 0xffffffff, 0x00007fbf,
1039           0xc79c, 0xffffffff, 0x00007faf
1040 };
1041 
1042 static const u32 bonaire_golden_registers[] =
1043 {
1044           0x3354, 0x00000333, 0x00000333,
1045           0x3350, 0x000c0fc0, 0x00040200,
1046           0x9a10, 0x00010000, 0x00058208,
1047           0x3c000, 0xffff1fff, 0x00140000,
1048           0x3c200, 0xfdfc0fff, 0x00000100,
1049           0x3c234, 0x40000000, 0x40000200,
1050           0x9830, 0xffffffff, 0x00000000,
1051           0x9834, 0xf00fffff, 0x00000400,
1052           0x9838, 0x0002021c, 0x00020200,
1053           0xc78, 0x00000080, 0x00000000,
1054           0x5bb0, 0x000000f0, 0x00000070,
1055           0x5bc0, 0xf0311fff, 0x80300000,
1056           0x98f8, 0x73773777, 0x12010001,
1057           0x350c, 0x00810000, 0x408af000,
1058           0x7030, 0x31000111, 0x00000011,
1059           0x2f48, 0x73773777, 0x12010001,
1060           0x220c, 0x00007fb6, 0x0021a1b1,
1061           0x2210, 0x00007fb6, 0x002021b1,
1062           0x2180, 0x00007fb6, 0x00002191,
1063           0x2218, 0x00007fb6, 0x002121b1,
1064           0x221c, 0x00007fb6, 0x002021b1,
1065           0x21dc, 0x00007fb6, 0x00002191,
1066           0x21e0, 0x00007fb6, 0x00002191,
1067           0x3628, 0x0000003f, 0x0000000a,
1068           0x362c, 0x0000003f, 0x0000000a,
1069           0x2ae4, 0x00073ffe, 0x000022a2,
1070           0x240c, 0x000007ff, 0x00000000,
1071           0x8a14, 0xf000003f, 0x00000007,
1072           0x8bf0, 0x00002001, 0x00000001,
1073           0x8b24, 0xffffffff, 0x00ffffff,
1074           0x30a04, 0x0000ff0f, 0x00000000,
1075           0x28a4c, 0x07ffffff, 0x06000000,
1076           0x4d8, 0x00000fff, 0x00000100,
1077           0x3e78, 0x00000001, 0x00000002,
1078           0x9100, 0x03000000, 0x0362c688,
1079           0x8c00, 0x000000ff, 0x00000001,
1080           0xe40, 0x00001fff, 0x00001fff,
1081           0x9060, 0x0000007f, 0x00000020,
1082           0x9508, 0x00010000, 0x00010000,
1083           0xac14, 0x000003ff, 0x000000f3,
1084           0xac0c, 0xffffffff, 0x00001032
1085 };
1086 
1087 static const u32 bonaire_mgcg_cgcg_init[] =
1088 {
1089           0xc420, 0xffffffff, 0xfffffffc,
1090           0x30800, 0xffffffff, 0xe0000000,
1091           0x3c2a0, 0xffffffff, 0x00000100,
1092           0x3c208, 0xffffffff, 0x00000100,
1093           0x3c2c0, 0xffffffff, 0xc0000100,
1094           0x3c2c8, 0xffffffff, 0xc0000100,
1095           0x3c2c4, 0xffffffff, 0xc0000100,
1096           0x55e4, 0xffffffff, 0x00600100,
1097           0x3c280, 0xffffffff, 0x00000100,
1098           0x3c214, 0xffffffff, 0x06000100,
1099           0x3c220, 0xffffffff, 0x00000100,
1100           0x3c218, 0xffffffff, 0x06000100,
1101           0x3c204, 0xffffffff, 0x00000100,
1102           0x3c2e0, 0xffffffff, 0x00000100,
1103           0x3c224, 0xffffffff, 0x00000100,
1104           0x3c200, 0xffffffff, 0x00000100,
1105           0x3c230, 0xffffffff, 0x00000100,
1106           0x3c234, 0xffffffff, 0x00000100,
1107           0x3c250, 0xffffffff, 0x00000100,
1108           0x3c254, 0xffffffff, 0x00000100,
1109           0x3c258, 0xffffffff, 0x00000100,
1110           0x3c25c, 0xffffffff, 0x00000100,
1111           0x3c260, 0xffffffff, 0x00000100,
1112           0x3c27c, 0xffffffff, 0x00000100,
1113           0x3c278, 0xffffffff, 0x00000100,
1114           0x3c210, 0xffffffff, 0x06000100,
1115           0x3c290, 0xffffffff, 0x00000100,
1116           0x3c274, 0xffffffff, 0x00000100,
1117           0x3c2b4, 0xffffffff, 0x00000100,
1118           0x3c2b0, 0xffffffff, 0x00000100,
1119           0x3c270, 0xffffffff, 0x00000100,
1120           0x30800, 0xffffffff, 0xe0000000,
1121           0x3c020, 0xffffffff, 0x00010000,
1122           0x3c024, 0xffffffff, 0x00030002,
1123           0x3c028, 0xffffffff, 0x00040007,
1124           0x3c02c, 0xffffffff, 0x00060005,
1125           0x3c030, 0xffffffff, 0x00090008,
1126           0x3c034, 0xffffffff, 0x00010000,
1127           0x3c038, 0xffffffff, 0x00030002,
1128           0x3c03c, 0xffffffff, 0x00040007,
1129           0x3c040, 0xffffffff, 0x00060005,
1130           0x3c044, 0xffffffff, 0x00090008,
1131           0x3c048, 0xffffffff, 0x00010000,
1132           0x3c04c, 0xffffffff, 0x00030002,
1133           0x3c050, 0xffffffff, 0x00040007,
1134           0x3c054, 0xffffffff, 0x00060005,
1135           0x3c058, 0xffffffff, 0x00090008,
1136           0x3c05c, 0xffffffff, 0x00010000,
1137           0x3c060, 0xffffffff, 0x00030002,
1138           0x3c064, 0xffffffff, 0x00040007,
1139           0x3c068, 0xffffffff, 0x00060005,
1140           0x3c06c, 0xffffffff, 0x00090008,
1141           0x3c070, 0xffffffff, 0x00010000,
1142           0x3c074, 0xffffffff, 0x00030002,
1143           0x3c078, 0xffffffff, 0x00040007,
1144           0x3c07c, 0xffffffff, 0x00060005,
1145           0x3c080, 0xffffffff, 0x00090008,
1146           0x3c084, 0xffffffff, 0x00010000,
1147           0x3c088, 0xffffffff, 0x00030002,
1148           0x3c08c, 0xffffffff, 0x00040007,
1149           0x3c090, 0xffffffff, 0x00060005,
1150           0x3c094, 0xffffffff, 0x00090008,
1151           0x3c098, 0xffffffff, 0x00010000,
1152           0x3c09c, 0xffffffff, 0x00030002,
1153           0x3c0a0, 0xffffffff, 0x00040007,
1154           0x3c0a4, 0xffffffff, 0x00060005,
1155           0x3c0a8, 0xffffffff, 0x00090008,
1156           0x3c000, 0xffffffff, 0x96e00200,
1157           0x8708, 0xffffffff, 0x00900100,
1158           0xc424, 0xffffffff, 0x0020003f,
1159           0x38, 0xffffffff, 0x0140001c,
1160           0x3c, 0x000f0000, 0x000f0000,
1161           0x220, 0xffffffff, 0xC060000C,
1162           0x224, 0xc0000fff, 0x00000100,
1163           0xf90, 0xffffffff, 0x00000100,
1164           0xf98, 0x00000101, 0x00000000,
1165           0x20a8, 0xffffffff, 0x00000104,
1166           0x55e4, 0xff000fff, 0x00000100,
1167           0x30cc, 0xc0000fff, 0x00000104,
1168           0xc1e4, 0x00000001, 0x00000001,
1169           0xd00c, 0xff000ff0, 0x00000100,
1170           0xd80c, 0xff000ff0, 0x00000100
1171 };
1172 
1173 static const u32 spectre_golden_spm_registers[] =
1174 {
1175           0x30800, 0xe0ffffff, 0xe0000000
1176 };
1177 
1178 static const u32 spectre_golden_common_registers[] =
1179 {
1180           0xc770, 0xffffffff, 0x00000800,
1181           0xc774, 0xffffffff, 0x00000800,
1182           0xc798, 0xffffffff, 0x00007fbf,
1183           0xc79c, 0xffffffff, 0x00007faf
1184 };
1185 
1186 static const u32 spectre_golden_registers[] =
1187 {
1188           0x3c000, 0xffff1fff, 0x96940200,
1189           0x3c00c, 0xffff0001, 0xff000000,
1190           0x3c200, 0xfffc0fff, 0x00000100,
1191           0x6ed8, 0x00010101, 0x00010000,
1192           0x9834, 0xf00fffff, 0x00000400,
1193           0x9838, 0xfffffffc, 0x00020200,
1194           0x5bb0, 0x000000f0, 0x00000070,
1195           0x5bc0, 0xf0311fff, 0x80300000,
1196           0x98f8, 0x73773777, 0x12010001,
1197           0x9b7c, 0x00ff0000, 0x00fc0000,
1198           0x2f48, 0x73773777, 0x12010001,
1199           0x8a14, 0xf000003f, 0x00000007,
1200           0x8b24, 0xffffffff, 0x00ffffff,
1201           0x28350, 0x3f3f3fff, 0x00000082,
1202           0x28354, 0x0000003f, 0x00000000,
1203           0x3e78, 0x00000001, 0x00000002,
1204           0x913c, 0xffff03df, 0x00000004,
1205           0xc768, 0x00000008, 0x00000008,
1206           0x8c00, 0x000008ff, 0x00000800,
1207           0x9508, 0x00010000, 0x00010000,
1208           0xac0c, 0xffffffff, 0x54763210,
1209           0x214f8, 0x01ff01ff, 0x00000002,
1210           0x21498, 0x007ff800, 0x00200000,
1211           0x2015c, 0xffffffff, 0x00000f40,
1212           0x30934, 0xffffffff, 0x00000001
1213 };
1214 
1215 static const u32 spectre_mgcg_cgcg_init[] =
1216 {
1217           0xc420, 0xffffffff, 0xfffffffc,
1218           0x30800, 0xffffffff, 0xe0000000,
1219           0x3c2a0, 0xffffffff, 0x00000100,
1220           0x3c208, 0xffffffff, 0x00000100,
1221           0x3c2c0, 0xffffffff, 0x00000100,
1222           0x3c2c8, 0xffffffff, 0x00000100,
1223           0x3c2c4, 0xffffffff, 0x00000100,
1224           0x55e4, 0xffffffff, 0x00600100,
1225           0x3c280, 0xffffffff, 0x00000100,
1226           0x3c214, 0xffffffff, 0x06000100,
1227           0x3c220, 0xffffffff, 0x00000100,
1228           0x3c218, 0xffffffff, 0x06000100,
1229           0x3c204, 0xffffffff, 0x00000100,
1230           0x3c2e0, 0xffffffff, 0x00000100,
1231           0x3c224, 0xffffffff, 0x00000100,
1232           0x3c200, 0xffffffff, 0x00000100,
1233           0x3c230, 0xffffffff, 0x00000100,
1234           0x3c234, 0xffffffff, 0x00000100,
1235           0x3c250, 0xffffffff, 0x00000100,
1236           0x3c254, 0xffffffff, 0x00000100,
1237           0x3c258, 0xffffffff, 0x00000100,
1238           0x3c25c, 0xffffffff, 0x00000100,
1239           0x3c260, 0xffffffff, 0x00000100,
1240           0x3c27c, 0xffffffff, 0x00000100,
1241           0x3c278, 0xffffffff, 0x00000100,
1242           0x3c210, 0xffffffff, 0x06000100,
1243           0x3c290, 0xffffffff, 0x00000100,
1244           0x3c274, 0xffffffff, 0x00000100,
1245           0x3c2b4, 0xffffffff, 0x00000100,
1246           0x3c2b0, 0xffffffff, 0x00000100,
1247           0x3c270, 0xffffffff, 0x00000100,
1248           0x30800, 0xffffffff, 0xe0000000,
1249           0x3c020, 0xffffffff, 0x00010000,
1250           0x3c024, 0xffffffff, 0x00030002,
1251           0x3c028, 0xffffffff, 0x00040007,
1252           0x3c02c, 0xffffffff, 0x00060005,
1253           0x3c030, 0xffffffff, 0x00090008,
1254           0x3c034, 0xffffffff, 0x00010000,
1255           0x3c038, 0xffffffff, 0x00030002,
1256           0x3c03c, 0xffffffff, 0x00040007,
1257           0x3c040, 0xffffffff, 0x00060005,
1258           0x3c044, 0xffffffff, 0x00090008,
1259           0x3c048, 0xffffffff, 0x00010000,
1260           0x3c04c, 0xffffffff, 0x00030002,
1261           0x3c050, 0xffffffff, 0x00040007,
1262           0x3c054, 0xffffffff, 0x00060005,
1263           0x3c058, 0xffffffff, 0x00090008,
1264           0x3c05c, 0xffffffff, 0x00010000,
1265           0x3c060, 0xffffffff, 0x00030002,
1266           0x3c064, 0xffffffff, 0x00040007,
1267           0x3c068, 0xffffffff, 0x00060005,
1268           0x3c06c, 0xffffffff, 0x00090008,
1269           0x3c070, 0xffffffff, 0x00010000,
1270           0x3c074, 0xffffffff, 0x00030002,
1271           0x3c078, 0xffffffff, 0x00040007,
1272           0x3c07c, 0xffffffff, 0x00060005,
1273           0x3c080, 0xffffffff, 0x00090008,
1274           0x3c084, 0xffffffff, 0x00010000,
1275           0x3c088, 0xffffffff, 0x00030002,
1276           0x3c08c, 0xffffffff, 0x00040007,
1277           0x3c090, 0xffffffff, 0x00060005,
1278           0x3c094, 0xffffffff, 0x00090008,
1279           0x3c098, 0xffffffff, 0x00010000,
1280           0x3c09c, 0xffffffff, 0x00030002,
1281           0x3c0a0, 0xffffffff, 0x00040007,
1282           0x3c0a4, 0xffffffff, 0x00060005,
1283           0x3c0a8, 0xffffffff, 0x00090008,
1284           0x3c0ac, 0xffffffff, 0x00010000,
1285           0x3c0b0, 0xffffffff, 0x00030002,
1286           0x3c0b4, 0xffffffff, 0x00040007,
1287           0x3c0b8, 0xffffffff, 0x00060005,
1288           0x3c0bc, 0xffffffff, 0x00090008,
1289           0x3c000, 0xffffffff, 0x96e00200,
1290           0x8708, 0xffffffff, 0x00900100,
1291           0xc424, 0xffffffff, 0x0020003f,
1292           0x38, 0xffffffff, 0x0140001c,
1293           0x3c, 0x000f0000, 0x000f0000,
1294           0x220, 0xffffffff, 0xC060000C,
1295           0x224, 0xc0000fff, 0x00000100,
1296           0xf90, 0xffffffff, 0x00000100,
1297           0xf98, 0x00000101, 0x00000000,
1298           0x20a8, 0xffffffff, 0x00000104,
1299           0x55e4, 0xff000fff, 0x00000100,
1300           0x30cc, 0xc0000fff, 0x00000104,
1301           0xc1e4, 0x00000001, 0x00000001,
1302           0xd00c, 0xff000ff0, 0x00000100,
1303           0xd80c, 0xff000ff0, 0x00000100
1304 };
1305 
1306 static const u32 kalindi_golden_spm_registers[] =
1307 {
1308           0x30800, 0xe0ffffff, 0xe0000000
1309 };
1310 
1311 static const u32 kalindi_golden_common_registers[] =
1312 {
1313           0xc770, 0xffffffff, 0x00000800,
1314           0xc774, 0xffffffff, 0x00000800,
1315           0xc798, 0xffffffff, 0x00007fbf,
1316           0xc79c, 0xffffffff, 0x00007faf
1317 };
1318 
1319 static const u32 kalindi_golden_registers[] =
1320 {
1321           0x3c000, 0xffffdfff, 0x6e944040,
1322           0x55e4, 0xff607fff, 0xfc000100,
1323           0x3c220, 0xff000fff, 0x00000100,
1324           0x3c224, 0xff000fff, 0x00000100,
1325           0x3c200, 0xfffc0fff, 0x00000100,
1326           0x6ed8, 0x00010101, 0x00010000,
1327           0x9830, 0xffffffff, 0x00000000,
1328           0x9834, 0xf00fffff, 0x00000400,
1329           0x5bb0, 0x000000f0, 0x00000070,
1330           0x5bc0, 0xf0311fff, 0x80300000,
1331           0x98f8, 0x73773777, 0x12010001,
1332           0x98fc, 0xffffffff, 0x00000010,
1333           0x9b7c, 0x00ff0000, 0x00fc0000,
1334           0x8030, 0x00001f0f, 0x0000100a,
1335           0x2f48, 0x73773777, 0x12010001,
1336           0x2408, 0x000fffff, 0x000c007f,
1337           0x8a14, 0xf000003f, 0x00000007,
1338           0x8b24, 0x3fff3fff, 0x00ffcfff,
1339           0x30a04, 0x0000ff0f, 0x00000000,
1340           0x28a4c, 0x07ffffff, 0x06000000,
1341           0x4d8, 0x00000fff, 0x00000100,
1342           0x3e78, 0x00000001, 0x00000002,
1343           0xc768, 0x00000008, 0x00000008,
1344           0x8c00, 0x000000ff, 0x00000003,
1345           0x214f8, 0x01ff01ff, 0x00000002,
1346           0x21498, 0x007ff800, 0x00200000,
1347           0x2015c, 0xffffffff, 0x00000f40,
1348           0x88c4, 0x001f3ae3, 0x00000082,
1349           0x88d4, 0x0000001f, 0x00000010,
1350           0x30934, 0xffffffff, 0x00000000
1351 };
1352 
1353 static const u32 kalindi_mgcg_cgcg_init[] =
1354 {
1355           0xc420, 0xffffffff, 0xfffffffc,
1356           0x30800, 0xffffffff, 0xe0000000,
1357           0x3c2a0, 0xffffffff, 0x00000100,
1358           0x3c208, 0xffffffff, 0x00000100,
1359           0x3c2c0, 0xffffffff, 0x00000100,
1360           0x3c2c8, 0xffffffff, 0x00000100,
1361           0x3c2c4, 0xffffffff, 0x00000100,
1362           0x55e4, 0xffffffff, 0x00600100,
1363           0x3c280, 0xffffffff, 0x00000100,
1364           0x3c214, 0xffffffff, 0x06000100,
1365           0x3c220, 0xffffffff, 0x00000100,
1366           0x3c218, 0xffffffff, 0x06000100,
1367           0x3c204, 0xffffffff, 0x00000100,
1368           0x3c2e0, 0xffffffff, 0x00000100,
1369           0x3c224, 0xffffffff, 0x00000100,
1370           0x3c200, 0xffffffff, 0x00000100,
1371           0x3c230, 0xffffffff, 0x00000100,
1372           0x3c234, 0xffffffff, 0x00000100,
1373           0x3c250, 0xffffffff, 0x00000100,
1374           0x3c254, 0xffffffff, 0x00000100,
1375           0x3c258, 0xffffffff, 0x00000100,
1376           0x3c25c, 0xffffffff, 0x00000100,
1377           0x3c260, 0xffffffff, 0x00000100,
1378           0x3c27c, 0xffffffff, 0x00000100,
1379           0x3c278, 0xffffffff, 0x00000100,
1380           0x3c210, 0xffffffff, 0x06000100,
1381           0x3c290, 0xffffffff, 0x00000100,
1382           0x3c274, 0xffffffff, 0x00000100,
1383           0x3c2b4, 0xffffffff, 0x00000100,
1384           0x3c2b0, 0xffffffff, 0x00000100,
1385           0x3c270, 0xffffffff, 0x00000100,
1386           0x30800, 0xffffffff, 0xe0000000,
1387           0x3c020, 0xffffffff, 0x00010000,
1388           0x3c024, 0xffffffff, 0x00030002,
1389           0x3c028, 0xffffffff, 0x00040007,
1390           0x3c02c, 0xffffffff, 0x00060005,
1391           0x3c030, 0xffffffff, 0x00090008,
1392           0x3c034, 0xffffffff, 0x00010000,
1393           0x3c038, 0xffffffff, 0x00030002,
1394           0x3c03c, 0xffffffff, 0x00040007,
1395           0x3c040, 0xffffffff, 0x00060005,
1396           0x3c044, 0xffffffff, 0x00090008,
1397           0x3c000, 0xffffffff, 0x96e00200,
1398           0x8708, 0xffffffff, 0x00900100,
1399           0xc424, 0xffffffff, 0x0020003f,
1400           0x38, 0xffffffff, 0x0140001c,
1401           0x3c, 0x000f0000, 0x000f0000,
1402           0x220, 0xffffffff, 0xC060000C,
1403           0x224, 0xc0000fff, 0x00000100,
1404           0x20a8, 0xffffffff, 0x00000104,
1405           0x55e4, 0xff000fff, 0x00000100,
1406           0x30cc, 0xc0000fff, 0x00000104,
1407           0xc1e4, 0x00000001, 0x00000001,
1408           0xd00c, 0xff000ff0, 0x00000100,
1409           0xd80c, 0xff000ff0, 0x00000100
1410 };
1411 
1412 static const u32 hawaii_golden_spm_registers[] =
1413 {
1414           0x30800, 0xe0ffffff, 0xe0000000
1415 };
1416 
1417 static const u32 hawaii_golden_common_registers[] =
1418 {
1419           0x30800, 0xffffffff, 0xe0000000,
1420           0x28350, 0xffffffff, 0x3a00161a,
1421           0x28354, 0xffffffff, 0x0000002e,
1422           0x9a10, 0xffffffff, 0x00018208,
1423           0x98f8, 0xffffffff, 0x12011003
1424 };
1425 
1426 static const u32 hawaii_golden_registers[] =
1427 {
1428           0x3354, 0x00000333, 0x00000333,
1429           0x9a10, 0x00010000, 0x00058208,
1430           0x9830, 0xffffffff, 0x00000000,
1431           0x9834, 0xf00fffff, 0x00000400,
1432           0x9838, 0x0002021c, 0x00020200,
1433           0xc78, 0x00000080, 0x00000000,
1434           0x5bb0, 0x000000f0, 0x00000070,
1435           0x5bc0, 0xf0311fff, 0x80300000,
1436           0x350c, 0x00810000, 0x408af000,
1437           0x7030, 0x31000111, 0x00000011,
1438           0x2f48, 0x73773777, 0x12010001,
1439           0x2120, 0x0000007f, 0x0000001b,
1440           0x21dc, 0x00007fb6, 0x00002191,
1441           0x3628, 0x0000003f, 0x0000000a,
1442           0x362c, 0x0000003f, 0x0000000a,
1443           0x2ae4, 0x00073ffe, 0x000022a2,
1444           0x240c, 0x000007ff, 0x00000000,
1445           0x8bf0, 0x00002001, 0x00000001,
1446           0x8b24, 0xffffffff, 0x00ffffff,
1447           0x30a04, 0x0000ff0f, 0x00000000,
1448           0x28a4c, 0x07ffffff, 0x06000000,
1449           0x3e78, 0x00000001, 0x00000002,
1450           0xc768, 0x00000008, 0x00000008,
1451           0xc770, 0x00000f00, 0x00000800,
1452           0xc774, 0x00000f00, 0x00000800,
1453           0xc798, 0x00ffffff, 0x00ff7fbf,
1454           0xc79c, 0x00ffffff, 0x00ff7faf,
1455           0x8c00, 0x000000ff, 0x00000800,
1456           0xe40, 0x00001fff, 0x00001fff,
1457           0x9060, 0x0000007f, 0x00000020,
1458           0x9508, 0x00010000, 0x00010000,
1459           0xae00, 0x00100000, 0x000ff07c,
1460           0xac14, 0x000003ff, 0x0000000f,
1461           0xac10, 0xffffffff, 0x7564fdec,
1462           0xac0c, 0xffffffff, 0x3120b9a8,
1463           0xac08, 0x20000000, 0x0f9c0000
1464 };
1465 
1466 static const u32 hawaii_mgcg_cgcg_init[] =
1467 {
1468           0xc420, 0xffffffff, 0xfffffffd,
1469           0x30800, 0xffffffff, 0xe0000000,
1470           0x3c2a0, 0xffffffff, 0x00000100,
1471           0x3c208, 0xffffffff, 0x00000100,
1472           0x3c2c0, 0xffffffff, 0x00000100,
1473           0x3c2c8, 0xffffffff, 0x00000100,
1474           0x3c2c4, 0xffffffff, 0x00000100,
1475           0x55e4, 0xffffffff, 0x00200100,
1476           0x3c280, 0xffffffff, 0x00000100,
1477           0x3c214, 0xffffffff, 0x06000100,
1478           0x3c220, 0xffffffff, 0x00000100,
1479           0x3c218, 0xffffffff, 0x06000100,
1480           0x3c204, 0xffffffff, 0x00000100,
1481           0x3c2e0, 0xffffffff, 0x00000100,
1482           0x3c224, 0xffffffff, 0x00000100,
1483           0x3c200, 0xffffffff, 0x00000100,
1484           0x3c230, 0xffffffff, 0x00000100,
1485           0x3c234, 0xffffffff, 0x00000100,
1486           0x3c250, 0xffffffff, 0x00000100,
1487           0x3c254, 0xffffffff, 0x00000100,
1488           0x3c258, 0xffffffff, 0x00000100,
1489           0x3c25c, 0xffffffff, 0x00000100,
1490           0x3c260, 0xffffffff, 0x00000100,
1491           0x3c27c, 0xffffffff, 0x00000100,
1492           0x3c278, 0xffffffff, 0x00000100,
1493           0x3c210, 0xffffffff, 0x06000100,
1494           0x3c290, 0xffffffff, 0x00000100,
1495           0x3c274, 0xffffffff, 0x00000100,
1496           0x3c2b4, 0xffffffff, 0x00000100,
1497           0x3c2b0, 0xffffffff, 0x00000100,
1498           0x3c270, 0xffffffff, 0x00000100,
1499           0x30800, 0xffffffff, 0xe0000000,
1500           0x3c020, 0xffffffff, 0x00010000,
1501           0x3c024, 0xffffffff, 0x00030002,
1502           0x3c028, 0xffffffff, 0x00040007,
1503           0x3c02c, 0xffffffff, 0x00060005,
1504           0x3c030, 0xffffffff, 0x00090008,
1505           0x3c034, 0xffffffff, 0x00010000,
1506           0x3c038, 0xffffffff, 0x00030002,
1507           0x3c03c, 0xffffffff, 0x00040007,
1508           0x3c040, 0xffffffff, 0x00060005,
1509           0x3c044, 0xffffffff, 0x00090008,
1510           0x3c048, 0xffffffff, 0x00010000,
1511           0x3c04c, 0xffffffff, 0x00030002,
1512           0x3c050, 0xffffffff, 0x00040007,
1513           0x3c054, 0xffffffff, 0x00060005,
1514           0x3c058, 0xffffffff, 0x00090008,
1515           0x3c05c, 0xffffffff, 0x00010000,
1516           0x3c060, 0xffffffff, 0x00030002,
1517           0x3c064, 0xffffffff, 0x00040007,
1518           0x3c068, 0xffffffff, 0x00060005,
1519           0x3c06c, 0xffffffff, 0x00090008,
1520           0x3c070, 0xffffffff, 0x00010000,
1521           0x3c074, 0xffffffff, 0x00030002,
1522           0x3c078, 0xffffffff, 0x00040007,
1523           0x3c07c, 0xffffffff, 0x00060005,
1524           0x3c080, 0xffffffff, 0x00090008,
1525           0x3c084, 0xffffffff, 0x00010000,
1526           0x3c088, 0xffffffff, 0x00030002,
1527           0x3c08c, 0xffffffff, 0x00040007,
1528           0x3c090, 0xffffffff, 0x00060005,
1529           0x3c094, 0xffffffff, 0x00090008,
1530           0x3c098, 0xffffffff, 0x00010000,
1531           0x3c09c, 0xffffffff, 0x00030002,
1532           0x3c0a0, 0xffffffff, 0x00040007,
1533           0x3c0a4, 0xffffffff, 0x00060005,
1534           0x3c0a8, 0xffffffff, 0x00090008,
1535           0x3c0ac, 0xffffffff, 0x00010000,
1536           0x3c0b0, 0xffffffff, 0x00030002,
1537           0x3c0b4, 0xffffffff, 0x00040007,
1538           0x3c0b8, 0xffffffff, 0x00060005,
1539           0x3c0bc, 0xffffffff, 0x00090008,
1540           0x3c0c0, 0xffffffff, 0x00010000,
1541           0x3c0c4, 0xffffffff, 0x00030002,
1542           0x3c0c8, 0xffffffff, 0x00040007,
1543           0x3c0cc, 0xffffffff, 0x00060005,
1544           0x3c0d0, 0xffffffff, 0x00090008,
1545           0x3c0d4, 0xffffffff, 0x00010000,
1546           0x3c0d8, 0xffffffff, 0x00030002,
1547           0x3c0dc, 0xffffffff, 0x00040007,
1548           0x3c0e0, 0xffffffff, 0x00060005,
1549           0x3c0e4, 0xffffffff, 0x00090008,
1550           0x3c0e8, 0xffffffff, 0x00010000,
1551           0x3c0ec, 0xffffffff, 0x00030002,
1552           0x3c0f0, 0xffffffff, 0x00040007,
1553           0x3c0f4, 0xffffffff, 0x00060005,
1554           0x3c0f8, 0xffffffff, 0x00090008,
1555           0xc318, 0xffffffff, 0x00020200,
1556           0x3350, 0xffffffff, 0x00000200,
1557           0x15c0, 0xffffffff, 0x00000400,
1558           0x55e8, 0xffffffff, 0x00000000,
1559           0x2f50, 0xffffffff, 0x00000902,
1560           0x3c000, 0xffffffff, 0x96940200,
1561           0x8708, 0xffffffff, 0x00900100,
1562           0xc424, 0xffffffff, 0x0020003f,
1563           0x38, 0xffffffff, 0x0140001c,
1564           0x3c, 0x000f0000, 0x000f0000,
1565           0x220, 0xffffffff, 0xc060000c,
1566           0x224, 0xc0000fff, 0x00000100,
1567           0xf90, 0xffffffff, 0x00000100,
1568           0xf98, 0x00000101, 0x00000000,
1569           0x20a8, 0xffffffff, 0x00000104,
1570           0x55e4, 0xff000fff, 0x00000100,
1571           0x30cc, 0xc0000fff, 0x00000104,
1572           0xc1e4, 0x00000001, 0x00000001,
1573           0xd00c, 0xff000ff0, 0x00000100,
1574           0xd80c, 0xff000ff0, 0x00000100
1575 };
1576 
1577 static const u32 godavari_golden_registers[] =
1578 {
1579           0x55e4, 0xff607fff, 0xfc000100,
1580           0x6ed8, 0x00010101, 0x00010000,
1581           0x9830, 0xffffffff, 0x00000000,
1582           0x98302, 0xf00fffff, 0x00000400,
1583           0x6130, 0xffffffff, 0x00010000,
1584           0x5bb0, 0x000000f0, 0x00000070,
1585           0x5bc0, 0xf0311fff, 0x80300000,
1586           0x98f8, 0x73773777, 0x12010001,
1587           0x98fc, 0xffffffff, 0x00000010,
1588           0x8030, 0x00001f0f, 0x0000100a,
1589           0x2f48, 0x73773777, 0x12010001,
1590           0x2408, 0x000fffff, 0x000c007f,
1591           0x8a14, 0xf000003f, 0x00000007,
1592           0x8b24, 0xffffffff, 0x00ff0fff,
1593           0x30a04, 0x0000ff0f, 0x00000000,
1594           0x28a4c, 0x07ffffff, 0x06000000,
1595           0x4d8, 0x00000fff, 0x00000100,
1596           0xd014, 0x00010000, 0x00810001,
1597           0xd814, 0x00010000, 0x00810001,
1598           0x3e78, 0x00000001, 0x00000002,
1599           0xc768, 0x00000008, 0x00000008,
1600           0xc770, 0x00000f00, 0x00000800,
1601           0xc774, 0x00000f00, 0x00000800,
1602           0xc798, 0x00ffffff, 0x00ff7fbf,
1603           0xc79c, 0x00ffffff, 0x00ff7faf,
1604           0x8c00, 0x000000ff, 0x00000001,
1605           0x214f8, 0x01ff01ff, 0x00000002,
1606           0x21498, 0x007ff800, 0x00200000,
1607           0x2015c, 0xffffffff, 0x00000f40,
1608           0x88c4, 0x001f3ae3, 0x00000082,
1609           0x88d4, 0x0000001f, 0x00000010,
1610           0x30934, 0xffffffff, 0x00000000
1611 };
1612 
1613 
cik_init_golden_registers(struct radeon_device * rdev)1614 static void cik_init_golden_registers(struct radeon_device *rdev)
1615 {
1616           /* Some of the registers might be dependent on GRBM_GFX_INDEX */
1617           mutex_lock(&rdev->grbm_idx_mutex);
1618           switch (rdev->family) {
1619           case CHIP_BONAIRE:
1620                     radeon_program_register_sequence(rdev,
1621                                                              bonaire_mgcg_cgcg_init,
1622                                                              (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1623                     radeon_program_register_sequence(rdev,
1624                                                              bonaire_golden_registers,
1625                                                              (const u32)ARRAY_SIZE(bonaire_golden_registers));
1626                     radeon_program_register_sequence(rdev,
1627                                                              bonaire_golden_common_registers,
1628                                                              (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1629                     radeon_program_register_sequence(rdev,
1630                                                              bonaire_golden_spm_registers,
1631                                                              (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1632                     break;
1633           case CHIP_KABINI:
1634                     radeon_program_register_sequence(rdev,
1635                                                              kalindi_mgcg_cgcg_init,
1636                                                              (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1637                     radeon_program_register_sequence(rdev,
1638                                                              kalindi_golden_registers,
1639                                                              (const u32)ARRAY_SIZE(kalindi_golden_registers));
1640                     radeon_program_register_sequence(rdev,
1641                                                              kalindi_golden_common_registers,
1642                                                              (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1643                     radeon_program_register_sequence(rdev,
1644                                                              kalindi_golden_spm_registers,
1645                                                              (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1646                     break;
1647           case CHIP_MULLINS:
1648                     radeon_program_register_sequence(rdev,
1649                                                              kalindi_mgcg_cgcg_init,
1650                                                              (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1651                     radeon_program_register_sequence(rdev,
1652                                                              godavari_golden_registers,
1653                                                              (const u32)ARRAY_SIZE(godavari_golden_registers));
1654                     radeon_program_register_sequence(rdev,
1655                                                              kalindi_golden_common_registers,
1656                                                              (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1657                     radeon_program_register_sequence(rdev,
1658                                                              kalindi_golden_spm_registers,
1659                                                              (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1660                     break;
1661           case CHIP_KAVERI:
1662                     radeon_program_register_sequence(rdev,
1663                                                              spectre_mgcg_cgcg_init,
1664                                                              (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1665                     radeon_program_register_sequence(rdev,
1666                                                              spectre_golden_registers,
1667                                                              (const u32)ARRAY_SIZE(spectre_golden_registers));
1668                     radeon_program_register_sequence(rdev,
1669                                                              spectre_golden_common_registers,
1670                                                              (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1671                     radeon_program_register_sequence(rdev,
1672                                                              spectre_golden_spm_registers,
1673                                                              (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1674                     break;
1675           case CHIP_HAWAII:
1676                     radeon_program_register_sequence(rdev,
1677                                                              hawaii_mgcg_cgcg_init,
1678                                                              (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1679                     radeon_program_register_sequence(rdev,
1680                                                              hawaii_golden_registers,
1681                                                              (const u32)ARRAY_SIZE(hawaii_golden_registers));
1682                     radeon_program_register_sequence(rdev,
1683                                                              hawaii_golden_common_registers,
1684                                                              (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1685                     radeon_program_register_sequence(rdev,
1686                                                              hawaii_golden_spm_registers,
1687                                                              (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1688                     break;
1689           default:
1690                     break;
1691           }
1692           mutex_unlock(&rdev->grbm_idx_mutex);
1693 }
1694 
1695 /**
1696  * cik_get_xclk - get the xclk
1697  *
1698  * @rdev: radeon_device pointer
1699  *
1700  * Returns the reference clock used by the gfx engine
1701  * (CIK).
1702  */
cik_get_xclk(struct radeon_device * rdev)1703 u32 cik_get_xclk(struct radeon_device *rdev)
1704 {
1705           u32 reference_clock = rdev->clock.spll.reference_freq;
1706 
1707           if (rdev->flags & RADEON_IS_IGP) {
1708                     if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1709                               return reference_clock / 2;
1710           } else {
1711                     if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1712                               return reference_clock / 4;
1713           }
1714           return reference_clock;
1715 }
1716 
1717 /**
1718  * cik_mm_rdoorbell - read a doorbell dword
1719  *
1720  * @rdev: radeon_device pointer
1721  * @index: doorbell index
1722  *
1723  * Returns the value in the doorbell aperture at the
1724  * requested doorbell index (CIK).
1725  */
cik_mm_rdoorbell(struct radeon_device * rdev,u32 index)1726 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1727 {
1728           if (index < rdev->doorbell.num_doorbells) {
1729                     return readl(rdev->doorbell.ptr + index);
1730           } else {
1731                     DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1732                     return 0;
1733           }
1734 }
1735 
1736 /**
1737  * cik_mm_wdoorbell - write a doorbell dword
1738  *
1739  * @rdev: radeon_device pointer
1740  * @index: doorbell index
1741  * @v: value to write
1742  *
1743  * Writes @v to the doorbell aperture at the
1744  * requested doorbell index (CIK).
1745  */
cik_mm_wdoorbell(struct radeon_device * rdev,u32 index,u32 v)1746 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1747 {
1748           if (index < rdev->doorbell.num_doorbells) {
1749                     writel(v, rdev->doorbell.ptr + index);
1750           } else {
1751                     DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1752           }
1753 }
1754 
1755 #define BONAIRE_IO_MC_REGS_SIZE 36
1756 
1757 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1758 {
1759           {0x00000070, 0x04400000},
1760           {0x00000071, 0x80c01803},
1761           {0x00000072, 0x00004004},
1762           {0x00000073, 0x00000100},
1763           {0x00000074, 0x00ff0000},
1764           {0x00000075, 0x34000000},
1765           {0x00000076, 0x08000014},
1766           {0x00000077, 0x00cc08ec},
1767           {0x00000078, 0x00000400},
1768           {0x00000079, 0x00000000},
1769           {0x0000007a, 0x04090000},
1770           {0x0000007c, 0x00000000},
1771           {0x0000007e, 0x4408a8e8},
1772           {0x0000007f, 0x00000304},
1773           {0x00000080, 0x00000000},
1774           {0x00000082, 0x00000001},
1775           {0x00000083, 0x00000002},
1776           {0x00000084, 0xf3e4f400},
1777           {0x00000085, 0x052024e3},
1778           {0x00000087, 0x00000000},
1779           {0x00000088, 0x01000000},
1780           {0x0000008a, 0x1c0a0000},
1781           {0x0000008b, 0xff010000},
1782           {0x0000008d, 0xffffefff},
1783           {0x0000008e, 0xfff3efff},
1784           {0x0000008f, 0xfff3efbf},
1785           {0x00000092, 0xf7ffffff},
1786           {0x00000093, 0xffffff7f},
1787           {0x00000095, 0x00101101},
1788           {0x00000096, 0x00000fff},
1789           {0x00000097, 0x00116fff},
1790           {0x00000098, 0x60010000},
1791           {0x00000099, 0x10010000},
1792           {0x0000009a, 0x00006000},
1793           {0x0000009b, 0x00001000},
1794           {0x0000009f, 0x00b48000}
1795 };
1796 
1797 #define HAWAII_IO_MC_REGS_SIZE 22
1798 
1799 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1800 {
1801           {0x0000007d, 0x40000000},
1802           {0x0000007e, 0x40180304},
1803           {0x0000007f, 0x0000ff00},
1804           {0x00000081, 0x00000000},
1805           {0x00000083, 0x00000800},
1806           {0x00000086, 0x00000000},
1807           {0x00000087, 0x00000100},
1808           {0x00000088, 0x00020100},
1809           {0x00000089, 0x00000000},
1810           {0x0000008b, 0x00040000},
1811           {0x0000008c, 0x00000100},
1812           {0x0000008e, 0xff010000},
1813           {0x00000090, 0xffffefff},
1814           {0x00000091, 0xfff3efff},
1815           {0x00000092, 0xfff3efbf},
1816           {0x00000093, 0xf7ffffff},
1817           {0x00000094, 0xffffff7f},
1818           {0x00000095, 0x00000fff},
1819           {0x00000096, 0x00116fff},
1820           {0x00000097, 0x60010000},
1821           {0x00000098, 0x10010000},
1822           {0x0000009f, 0x00c79000}
1823 };
1824 
1825 
1826 /**
1827  * cik_srbm_select - select specific register instances
1828  *
1829  * @rdev: radeon_device pointer
1830  * @me: selected ME (micro engine)
1831  * @pipe: pipe
1832  * @queue: queue
1833  * @vmid: VMID
1834  *
1835  * Switches the currently active registers instances.  Some
1836  * registers are instanced per VMID, others are instanced per
1837  * me/pipe/queue combination.
1838  */
cik_srbm_select(struct radeon_device * rdev,u32 me,u32 pipe,u32 queue,u32 vmid)1839 static void cik_srbm_select(struct radeon_device *rdev,
1840                                   u32 me, u32 pipe, u32 queue, u32 vmid)
1841 {
1842           u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1843                                    MEID(me & 0x3) |
1844                                    VMID(vmid & 0xf) |
1845                                    QUEUEID(queue & 0x7));
1846           WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1847 }
1848 
1849 /* ucode loading */
1850 /**
1851  * ci_mc_load_microcode - load MC ucode into the hw
1852  *
1853  * @rdev: radeon_device pointer
1854  *
1855  * Load the GDDR MC ucode into the hw (CIK).
1856  * Returns 0 on success, error on failure.
1857  */
ci_mc_load_microcode(struct radeon_device * rdev)1858 int ci_mc_load_microcode(struct radeon_device *rdev)
1859 {
1860           const __be32 *fw_data = NULL;
1861           const __le32 *new_fw_data = NULL;
1862           u32 running, tmp;
1863           u32 *io_mc_regs = NULL;
1864           const __le32 *new_io_mc_regs = NULL;
1865           int i, regs_size, ucode_size;
1866 
1867           if (!rdev->mc_fw)
1868                     return -EINVAL;
1869 
1870           if (rdev->new_fw) {
1871                     const struct mc_firmware_header_v1_0 *hdr =
1872                               (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1873 
1874                     radeon_ucode_print_mc_hdr(&hdr->header);
1875 
1876                     regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1877                     new_io_mc_regs = (const __le32 *)
1878                               (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1879                     ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1880                     new_fw_data = (const __le32 *)
1881                               (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1882           } else {
1883                     ucode_size = rdev->mc_fw->datasize / 4;
1884 
1885                     switch (rdev->family) {
1886                     case CHIP_BONAIRE:
1887                               io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1888                               regs_size = BONAIRE_IO_MC_REGS_SIZE;
1889                               break;
1890                     case CHIP_HAWAII:
1891                               io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1892                               regs_size = HAWAII_IO_MC_REGS_SIZE;
1893                               break;
1894                     default:
1895                               return -EINVAL;
1896                     }
1897                     fw_data = (const __be32 *)rdev->mc_fw->data;
1898           }
1899 
1900           running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1901 
1902           if (running == 0) {
1903                     /* reset the engine and set to writable */
1904                     WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1905                     WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1906 
1907                     /* load mc io regs */
1908                     for (i = 0; i < regs_size; i++) {
1909                               if (rdev->new_fw) {
1910                                         WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1911                                         WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1912                               } else {
1913                                         WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1914                                         WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1915                               }
1916                     }
1917 
1918                     tmp = RREG32(MC_SEQ_MISC0);
1919                     if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1920                               WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1921                               WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1922                               WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1923                               WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1924                     }
1925 
1926                     /* load the MC ucode */
1927                     for (i = 0; i < ucode_size; i++) {
1928                               if (rdev->new_fw)
1929                                         WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1930                               else
1931                                         WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1932                     }
1933 
1934                     /* put the engine back into the active state */
1935                     WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1936                     WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1937                     WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1938 
1939                     /* wait for training to complete */
1940                     for (i = 0; i < rdev->usec_timeout; i++) {
1941                               if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1942                                         break;
1943                               udelay(1);
1944                     }
1945                     for (i = 0; i < rdev->usec_timeout; i++) {
1946                               if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1947                                         break;
1948                               udelay(1);
1949                     }
1950           }
1951 
1952           return 0;
1953 }
1954 
1955 /**
1956  * cik_init_microcode - load ucode images from disk
1957  *
1958  * @rdev: radeon_device pointer
1959  *
1960  * Use the firmware interface to load the ucode images into
1961  * the driver (not loaded into hw).
1962  * Returns 0 on success, error on failure.
1963  */
cik_init_microcode(struct radeon_device * rdev)1964 static int cik_init_microcode(struct radeon_device *rdev)
1965 {
1966           const char *chip_name;
1967           const char *new_chip_name;
1968           size_t pfp_req_size, me_req_size, ce_req_size,
1969                     mec_req_size, rlc_req_size, mc_req_size = 0,
1970                     sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1971           char fw_name[30];
1972           int new_fw = 0;
1973           int err;
1974           int num_fw;
1975           bool new_smc = false;
1976 
1977           DRM_DEBUG("\n");
1978 
1979           switch (rdev->family) {
1980           case CHIP_BONAIRE:
1981                     chip_name = "BONAIRE";
1982                     if ((rdev->pdev->revision == 0x80) ||
1983                         (rdev->pdev->revision == 0x81) ||
1984                         (rdev->pdev->device == 0x665f))
1985                               new_smc = true;
1986                     new_chip_name = "bonaire";
1987                     pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1988                     me_req_size = CIK_ME_UCODE_SIZE * 4;
1989                     ce_req_size = CIK_CE_UCODE_SIZE * 4;
1990                     mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1991                     rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1992                     mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1993                     mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1994                     sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1995                     smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1996                     num_fw = 8;
1997                     break;
1998           case CHIP_HAWAII:
1999                     chip_name = "HAWAII";
2000                     if (rdev->pdev->revision == 0x80)
2001                               new_smc = true;
2002                     new_chip_name = "hawaii";
2003                     pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2004                     me_req_size = CIK_ME_UCODE_SIZE * 4;
2005                     ce_req_size = CIK_CE_UCODE_SIZE * 4;
2006                     mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2007                     rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2008                     mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2009                     mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2010                     sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2011                     smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2012                     num_fw = 8;
2013                     break;
2014           case CHIP_KAVERI:
2015                     chip_name = "KAVERI";
2016                     new_chip_name = "kaveri";
2017                     pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2018                     me_req_size = CIK_ME_UCODE_SIZE * 4;
2019                     ce_req_size = CIK_CE_UCODE_SIZE * 4;
2020                     mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2021                     rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2022                     sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2023                     num_fw = 7;
2024                     break;
2025           case CHIP_KABINI:
2026                     chip_name = "KABINI";
2027                     new_chip_name = "kabini";
2028                     pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029                     me_req_size = CIK_ME_UCODE_SIZE * 4;
2030                     ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031                     mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032                     rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2033                     sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034                     num_fw = 6;
2035                     break;
2036           case CHIP_MULLINS:
2037                     chip_name = "MULLINS";
2038                     new_chip_name = "mullins";
2039                     pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040                     me_req_size = CIK_ME_UCODE_SIZE * 4;
2041                     ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042                     mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043                     rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2044                     sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045                     num_fw = 6;
2046                     break;
2047           default: BUG();
2048           }
2049 
2050           DRM_INFO("Loading %s Microcode\n", new_chip_name);
2051 
2052           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", new_chip_name);
2053           err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2054           if (err) {
2055                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_pfp", chip_name);
2056                     err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2057                     if (err)
2058                               goto out;
2059                     if (rdev->pfp_fw->datasize != pfp_req_size) {
2060                               pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2061                                      rdev->pfp_fw->datasize, fw_name);
2062                               err = -EINVAL;
2063                               goto out;
2064                     }
2065           } else {
2066                     err = radeon_ucode_validate(rdev->pfp_fw);
2067                     if (err) {
2068                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2069                                      fw_name);
2070                               goto out;
2071                     } else {
2072                               new_fw++;
2073                     }
2074           }
2075 
2076           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", new_chip_name);
2077           err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2078           if (err) {
2079                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_me", chip_name);
2080                     err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2081                     if (err)
2082                               goto out;
2083                     if (rdev->me_fw->datasize != me_req_size) {
2084                               pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2085                                      rdev->me_fw->datasize, fw_name);
2086                               err = -EINVAL;
2087                     }
2088           } else {
2089                     err = radeon_ucode_validate(rdev->me_fw);
2090                     if (err) {
2091                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2092                                      fw_name);
2093                               goto out;
2094                     } else {
2095                               new_fw++;
2096                     }
2097           }
2098 
2099           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", new_chip_name);
2100           err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2101           if (err) {
2102                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_ce", chip_name);
2103                     err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2104                     if (err)
2105                               goto out;
2106                     if (rdev->ce_fw->datasize != ce_req_size) {
2107                               pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2108                                      rdev->ce_fw->datasize, fw_name);
2109                               err = -EINVAL;
2110                     }
2111           } else {
2112                     err = radeon_ucode_validate(rdev->ce_fw);
2113                     if (err) {
2114                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2115                                      fw_name);
2116                               goto out;
2117                     } else {
2118                               new_fw++;
2119                     }
2120           }
2121 
2122           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", new_chip_name);
2123           err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2124           if (err) {
2125                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec", chip_name);
2126                     err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2127                     if (err)
2128                               goto out;
2129                     if (rdev->mec_fw->datasize != mec_req_size) {
2130                               pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2131                                      rdev->mec_fw->datasize, fw_name);
2132                               err = -EINVAL;
2133                     }
2134           } else {
2135                     err = radeon_ucode_validate(rdev->mec_fw);
2136                     if (err) {
2137                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2138                                      fw_name);
2139                               goto out;
2140                     } else {
2141                               new_fw++;
2142                     }
2143           }
2144 
2145           if (rdev->family == CHIP_KAVERI) {
2146                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mec2", new_chip_name);
2147                     err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2148                     if (err) {
2149                               goto out;
2150                     } else {
2151                               err = radeon_ucode_validate(rdev->mec2_fw);
2152                               if (err) {
2153                                         goto out;
2154                               } else {
2155                                         new_fw++;
2156                               }
2157                     }
2158           }
2159 
2160           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", new_chip_name);
2161           err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2162           if (err) {
2163                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_rlc", chip_name);
2164                     err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2165                     if (err)
2166                               goto out;
2167                     if (rdev->rlc_fw->datasize != rlc_req_size) {
2168                               pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2169                                      rdev->rlc_fw->datasize, fw_name);
2170                               err = -EINVAL;
2171                     }
2172           } else {
2173                     err = radeon_ucode_validate(rdev->rlc_fw);
2174                     if (err) {
2175                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2176                                      fw_name);
2177                               goto out;
2178                     } else {
2179                               new_fw++;
2180                     }
2181           }
2182 
2183           ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", new_chip_name);
2184           err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2185           if (err) {
2186                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_sdma", chip_name);
2187                     err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2188                     if (err)
2189                               goto out;
2190                     if (rdev->sdma_fw->datasize != sdma_req_size) {
2191                               pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2192                                      rdev->sdma_fw->datasize, fw_name);
2193                               err = -EINVAL;
2194                     }
2195           } else {
2196                     err = radeon_ucode_validate(rdev->sdma_fw);
2197                     if (err) {
2198                               pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2199                                      fw_name);
2200                               goto out;
2201                     } else {
2202                               new_fw++;
2203                     }
2204           }
2205 
2206           /* No SMC, MC ucode on APUs */
2207           if (!(rdev->flags & RADEON_IS_IGP)) {
2208                     ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", new_chip_name);
2209                     err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2210                     if (err) {
2211                               ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc2", chip_name);
2212                               err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2213                               if (err) {
2214                                         ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_mc", chip_name);
2215                                         err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2216                                         if (err)
2217                                                   goto out;
2218                               }
2219                               if ((rdev->mc_fw->datasize != mc_req_size) &&
2220                                   (rdev->mc_fw->datasize != mc2_req_size)){
2221                                         pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2222                                                rdev->mc_fw->datasize, fw_name);
2223                                         err = -EINVAL;
2224                               }
2225                               DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->datasize);
2226                     } else {
2227                               err = radeon_ucode_validate(rdev->mc_fw);
2228                               if (err) {
2229                                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2230                                                fw_name);
2231                                         goto out;
2232                               } else {
2233                                         new_fw++;
2234                               }
2235                     }
2236 
2237                     if (new_smc)
2238                               ksnprintf(fw_name, sizeof(fw_name), "amdgpufw_%s_k_smc", new_chip_name);
2239                     else
2240                               ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", new_chip_name);
2241                     err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2242                     if (err) {
2243                               ksnprintf(fw_name, sizeof(fw_name), "radeonkmsfw_%s_smc", chip_name);
2244                               err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2245                               if (err) {
2246                                         pr_err("smc: error loading firmware \"%s\"\n",
2247                                                fw_name);
2248                                         release_firmware(rdev->smc_fw);
2249                                         rdev->smc_fw = NULL;
2250                                         err = 0;
2251                               } else if (rdev->smc_fw->datasize != smc_req_size) {
2252                                         pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2253                                                rdev->smc_fw->datasize, fw_name);
2254                                         err = -EINVAL;
2255                               }
2256                     } else {
2257                               err = radeon_ucode_validate(rdev->smc_fw);
2258                               if (err) {
2259                                         pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2260                                                fw_name);
2261                                         goto out;
2262                               } else {
2263                                         new_fw++;
2264                               }
2265                     }
2266           }
2267 
2268           if (new_fw == 0) {
2269                     rdev->new_fw = false;
2270           } else if (new_fw < num_fw) {
2271                     pr_err("ci_fw: mixing new and old firmware!\n");
2272                     err = -EINVAL;
2273           } else {
2274                     rdev->new_fw = true;
2275           }
2276 
2277 out:
2278           if (err) {
2279                     if (err != -EINVAL)
2280                               pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2281                                      fw_name);
2282                     release_firmware(rdev->pfp_fw);
2283                     rdev->pfp_fw = NULL;
2284                     release_firmware(rdev->me_fw);
2285                     rdev->me_fw = NULL;
2286                     release_firmware(rdev->ce_fw);
2287                     rdev->ce_fw = NULL;
2288                     release_firmware(rdev->mec_fw);
2289                     rdev->mec_fw = NULL;
2290                     release_firmware(rdev->mec2_fw);
2291                     rdev->mec2_fw = NULL;
2292                     release_firmware(rdev->rlc_fw);
2293                     rdev->rlc_fw = NULL;
2294                     release_firmware(rdev->sdma_fw);
2295                     rdev->sdma_fw = NULL;
2296                     release_firmware(rdev->mc_fw);
2297                     rdev->mc_fw = NULL;
2298                     release_firmware(rdev->smc_fw);
2299                     rdev->smc_fw = NULL;
2300           }
2301           return err;
2302 }
2303 
2304 /**
2305  * cik_fini_microcode - drop the firmwares image references
2306  *
2307  * @rdev: radeon_device pointer
2308  *
2309  * Drop the pfp, me, mec, mec2, rlc, sdma, mc, smc and ce firmware image references.
2310  * Called at driver shutdown.
2311  */
cik_fini_microcode(struct radeon_device * rdev)2312 static void cik_fini_microcode(struct radeon_device *rdev)
2313 {
2314           release_firmware(rdev->pfp_fw);
2315           rdev->pfp_fw = NULL;
2316           release_firmware(rdev->me_fw);
2317           rdev->me_fw = NULL;
2318           release_firmware(rdev->ce_fw);
2319           rdev->ce_fw = NULL;
2320           release_firmware(rdev->mec_fw);
2321           rdev->mec_fw = NULL;
2322           release_firmware(rdev->mec2_fw);
2323           rdev->mec2_fw = NULL;
2324           release_firmware(rdev->rlc_fw);
2325           rdev->rlc_fw = NULL;
2326           release_firmware(rdev->sdma_fw);
2327           rdev->sdma_fw = NULL;
2328           release_firmware(rdev->mc_fw);
2329           rdev->mc_fw = NULL;
2330           release_firmware(rdev->smc_fw);
2331           rdev->smc_fw = NULL;
2332 }
2333 
2334 /*
2335  * Core functions
2336  */
2337 /**
2338  * cik_tiling_mode_table_init - init the hw tiling table
2339  *
2340  * @rdev: radeon_device pointer
2341  *
2342  * Starting with SI, the tiling setup is done globally in a
2343  * set of 32 tiling modes.  Rather than selecting each set of
2344  * parameters per surface as on older asics, we just select
2345  * which index in the tiling table we want to use, and the
2346  * surface uses those parameters (CIK).
2347  */
cik_tiling_mode_table_init(struct radeon_device * rdev)2348 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2349 {
2350           u32 *tile = rdev->config.cik.tile_mode_array;
2351           u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2352           const u32 num_tile_mode_states =
2353                               ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2354           const u32 num_secondary_tile_mode_states =
2355                               ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2356           u32 reg_offset, split_equal_to_row_size;
2357           u32 num_pipe_configs;
2358           u32 num_rbs = rdev->config.cik.max_backends_per_se *
2359                     rdev->config.cik.max_shader_engines;
2360 
2361           switch (rdev->config.cik.mem_row_size_in_kb) {
2362           case 1:
2363                     split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2364                     break;
2365           case 2:
2366           default:
2367                     split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2368                     break;
2369           case 4:
2370                     split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2371                     break;
2372           }
2373 
2374           num_pipe_configs = rdev->config.cik.max_tile_pipes;
2375           if (num_pipe_configs > 8)
2376                     num_pipe_configs = 16;
2377 
2378           for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2379                     tile[reg_offset] = 0;
2380           for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2381                     macrotile[reg_offset] = 0;
2382 
2383           switch(num_pipe_configs) {
2384           case 16:
2385                     tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2387                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2389                     tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2391                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2393                     tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397                     tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2401                     tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  TILE_SPLIT(split_equal_to_row_size));
2405                     tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                     tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2409                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2412                     tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2413                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2414                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                  TILE_SPLIT(split_equal_to_row_size));
2416                     tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2417                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2418                     tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2421                     tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2423                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425                     tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2427                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429                     tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2431                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                     tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2436                     tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2438                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440                     tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444                     tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448                     tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2451                     tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2452                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2453                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2454                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2455                     tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2457                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2458                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2459                     tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2460                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2461                                   PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2462                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 
2464                     macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467                                  NUM_BANKS(ADDR_SURF_16_BANK));
2468                     macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471                                  NUM_BANKS(ADDR_SURF_16_BANK));
2472                     macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475                                  NUM_BANKS(ADDR_SURF_16_BANK));
2476                     macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479                                  NUM_BANKS(ADDR_SURF_16_BANK));
2480                     macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483                                  NUM_BANKS(ADDR_SURF_8_BANK));
2484                     macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487                                  NUM_BANKS(ADDR_SURF_4_BANK));
2488                     macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491                                  NUM_BANKS(ADDR_SURF_2_BANK));
2492                     macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2493                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2494                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2495                                  NUM_BANKS(ADDR_SURF_16_BANK));
2496                     macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2497                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2498                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2499                                  NUM_BANKS(ADDR_SURF_16_BANK));
2500                     macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2501                                   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2502                                   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2503                                   NUM_BANKS(ADDR_SURF_16_BANK));
2504                     macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2505                                   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2506                                   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2507                                   NUM_BANKS(ADDR_SURF_8_BANK));
2508                     macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2509                                   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2510                                   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2511                                   NUM_BANKS(ADDR_SURF_4_BANK));
2512                     macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514                                   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2515                                   NUM_BANKS(ADDR_SURF_2_BANK));
2516                     macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2518                                   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2519                                   NUM_BANKS(ADDR_SURF_2_BANK));
2520 
2521                     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2522                               WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2523                     for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2524                               WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2525                     break;
2526 
2527           case 8:
2528                     tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2532                     tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2533                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2536                     tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540                     tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2541                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2544                     tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2545                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  TILE_SPLIT(split_equal_to_row_size));
2548                     tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2551                     tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2553                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2555                     tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2557                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2558                                  TILE_SPLIT(split_equal_to_row_size));
2559                     tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2560                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2561                     tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2564                     tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568                     tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572                     tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2574                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576                     tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2579                     tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583                     tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587                     tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2589                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591                     tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2592                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2594                     tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2596                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2598                     tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2599                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2600                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2601                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2602                     tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2603                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2604                                   PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2606 
2607                     macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2609                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610                                         NUM_BANKS(ADDR_SURF_16_BANK));
2611                     macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2614                                         NUM_BANKS(ADDR_SURF_16_BANK));
2615                     macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2617                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618                                         NUM_BANKS(ADDR_SURF_16_BANK));
2619                     macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622                                         NUM_BANKS(ADDR_SURF_16_BANK));
2623                     macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626                                         NUM_BANKS(ADDR_SURF_8_BANK));
2627                     macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630                                         NUM_BANKS(ADDR_SURF_4_BANK));
2631                     macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634                                         NUM_BANKS(ADDR_SURF_2_BANK));
2635                     macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2637                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                         NUM_BANKS(ADDR_SURF_16_BANK));
2639                     macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                         NUM_BANKS(ADDR_SURF_16_BANK));
2643                     macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2645                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2646                                         NUM_BANKS(ADDR_SURF_16_BANK));
2647                     macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2650                                         NUM_BANKS(ADDR_SURF_16_BANK));
2651                     macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2653                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2654                                         NUM_BANKS(ADDR_SURF_8_BANK));
2655                     macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                         NUM_BANKS(ADDR_SURF_4_BANK));
2659                     macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                         NUM_BANKS(ADDR_SURF_2_BANK));
2663 
2664                     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2665                               WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2666                     for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2667                               WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2668                     break;
2669 
2670           case 4:
2671                     if (num_rbs == 4) {
2672                     tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2676                     tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2678                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2680                     tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684                     tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2685                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2688                     tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2690                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                  TILE_SPLIT(split_equal_to_row_size));
2692                     tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2695                     tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2699                     tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2700                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2701                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702                                  TILE_SPLIT(split_equal_to_row_size));
2703                     tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2704                                  PIPE_CONFIG(ADDR_SURF_P4_16x16));
2705                     tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708                     tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712                     tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2714                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716                     tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720                     tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2723                     tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2724                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2725                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727                     tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2729                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731                     tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2733                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735                     tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2738                     tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2739                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2740                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742                     tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2744                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2745                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746                     tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2747                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2748                                   PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 
2751                     } else if (num_rbs < 4) {
2752                     tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2754                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2756                     tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2758                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2760                     tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764                     tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2765                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2768                     tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2770                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771                                  TILE_SPLIT(split_equal_to_row_size));
2772                     tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2775                     tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2776                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2777                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2779                     tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2780                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2781                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782                                  TILE_SPLIT(split_equal_to_row_size));
2783                     tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784                                  PIPE_CONFIG(ADDR_SURF_P4_8x16));
2785                     tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786                                  PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2788                     tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792                     tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796                     tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2798                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800                     tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2803                     tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2805                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807                     tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2809                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811                     tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2813                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815                     tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2816                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2817                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2818                     tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2821                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                     tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2825                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                     tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2827                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2828                                   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2829                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                     }
2831 
2832                     macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835                                         NUM_BANKS(ADDR_SURF_16_BANK));
2836                     macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                         NUM_BANKS(ADDR_SURF_16_BANK));
2840                     macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2842                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2843                                         NUM_BANKS(ADDR_SURF_16_BANK));
2844                     macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                         NUM_BANKS(ADDR_SURF_16_BANK));
2848                     macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851                                         NUM_BANKS(ADDR_SURF_16_BANK));
2852                     macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855                                         NUM_BANKS(ADDR_SURF_8_BANK));
2856                     macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859                                         NUM_BANKS(ADDR_SURF_4_BANK));
2860                     macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                         NUM_BANKS(ADDR_SURF_16_BANK));
2864                     macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867                                         NUM_BANKS(ADDR_SURF_16_BANK));
2868                     macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2869                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2870                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2871                                         NUM_BANKS(ADDR_SURF_16_BANK));
2872                     macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2874                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875                                         NUM_BANKS(ADDR_SURF_16_BANK));
2876                     macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                         NUM_BANKS(ADDR_SURF_16_BANK));
2880                     macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                         NUM_BANKS(ADDR_SURF_8_BANK));
2884                     macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887                                         NUM_BANKS(ADDR_SURF_4_BANK));
2888 
2889                     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890                               WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2891                     for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2892                               WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2893                     break;
2894 
2895           case 2:
2896                     tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898                                  PIPE_CONFIG(ADDR_SURF_P2) |
2899                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2900                     tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2902                                  PIPE_CONFIG(ADDR_SURF_P2) |
2903                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2904                     tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906                                  PIPE_CONFIG(ADDR_SURF_P2) |
2907                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908                     tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910                                  PIPE_CONFIG(ADDR_SURF_P2) |
2911                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2912                     tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2914                                  PIPE_CONFIG(ADDR_SURF_P2) |
2915                                  TILE_SPLIT(split_equal_to_row_size));
2916                     tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917                                  PIPE_CONFIG(ADDR_SURF_P2) |
2918                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919                     tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921                                  PIPE_CONFIG(ADDR_SURF_P2) |
2922                                  TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2923                     tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2925                                  PIPE_CONFIG(ADDR_SURF_P2) |
2926                                  TILE_SPLIT(split_equal_to_row_size));
2927                     tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2928                                  PIPE_CONFIG(ADDR_SURF_P2);
2929                     tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931                                  PIPE_CONFIG(ADDR_SURF_P2));
2932                     tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2934                                   PIPE_CONFIG(ADDR_SURF_P2) |
2935                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936                     tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2938                                   PIPE_CONFIG(ADDR_SURF_P2) |
2939                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940                     tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941                                   MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                   PIPE_CONFIG(ADDR_SURF_P2) |
2943                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944                     tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945                                   PIPE_CONFIG(ADDR_SURF_P2) |
2946                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2947                     tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2948                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2949                                   PIPE_CONFIG(ADDR_SURF_P2) |
2950                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951                     tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                   PIPE_CONFIG(ADDR_SURF_P2) |
2954                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955                     tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956                                   MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                   PIPE_CONFIG(ADDR_SURF_P2) |
2958                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                     tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2960                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2961                                   PIPE_CONFIG(ADDR_SURF_P2));
2962                     tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2963                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2964                                   PIPE_CONFIG(ADDR_SURF_P2) |
2965                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                     tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2968                                   PIPE_CONFIG(ADDR_SURF_P2) |
2969                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2970                     tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2971                                   MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                                   PIPE_CONFIG(ADDR_SURF_P2) |
2973                                   SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2974 
2975                     macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2976                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2977                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978                                         NUM_BANKS(ADDR_SURF_16_BANK));
2979                     macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2980                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2981                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982                                         NUM_BANKS(ADDR_SURF_16_BANK));
2983                     macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2984                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2985                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986                                         NUM_BANKS(ADDR_SURF_16_BANK));
2987                     macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2988                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2989                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990                                         NUM_BANKS(ADDR_SURF_16_BANK));
2991                     macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2993                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                         NUM_BANKS(ADDR_SURF_16_BANK));
2995                     macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998                                         NUM_BANKS(ADDR_SURF_16_BANK));
2999                     macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002                                         NUM_BANKS(ADDR_SURF_8_BANK));
3003                     macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3004                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3005                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3006                                         NUM_BANKS(ADDR_SURF_16_BANK));
3007                     macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3008                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010                                         NUM_BANKS(ADDR_SURF_16_BANK));
3011                     macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3012                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3013                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3014                                         NUM_BANKS(ADDR_SURF_16_BANK));
3015                     macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3016                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3017                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3018                                         NUM_BANKS(ADDR_SURF_16_BANK));
3019                     macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022                                         NUM_BANKS(ADDR_SURF_16_BANK));
3023                     macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3025                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                         NUM_BANKS(ADDR_SURF_16_BANK));
3027                     macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                         BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3029                                         MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3030                                         NUM_BANKS(ADDR_SURF_8_BANK));
3031 
3032                     for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3033                               WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3034                     for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3035                               WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3036                     break;
3037 
3038           default:
3039                     DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3040           }
3041 }
3042 
3043 /**
3044  * cik_select_se_sh - select which SE, SH to address
3045  *
3046  * @rdev: radeon_device pointer
3047  * @se_num: shader engine to address
3048  * @sh_num: sh block to address
3049  *
3050  * Select which SE, SH combinations to address. Certain
3051  * registers are instanced per SE or SH.  0xffffffff means
3052  * broadcast to all SEs or SHs (CIK).
3053  */
cik_select_se_sh(struct radeon_device * rdev,u32 se_num,u32 sh_num)3054 static void cik_select_se_sh(struct radeon_device *rdev,
3055                                    u32 se_num, u32 sh_num)
3056 {
3057           u32 data = INSTANCE_BROADCAST_WRITES;
3058 
3059           if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3060                     data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3061           else if (se_num == 0xffffffff)
3062                     data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3063           else if (sh_num == 0xffffffff)
3064                     data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3065           else
3066                     data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3067           WREG32(GRBM_GFX_INDEX, data);
3068 }
3069 
3070 /**
3071  * cik_create_bitmask - create a bitmask
3072  *
3073  * @bit_width: length of the mask
3074  *
3075  * create a variable length bit mask (CIK).
3076  * Returns the bitmask.
3077  */
cik_create_bitmask(u32 bit_width)3078 static u32 cik_create_bitmask(u32 bit_width)
3079 {
3080           u32 i, mask = 0;
3081 
3082           for (i = 0; i < bit_width; i++) {
3083                     mask <<= 1;
3084                     mask |= 1;
3085           }
3086           return mask;
3087 }
3088 
3089 /**
3090  * cik_get_rb_disabled - computes the mask of disabled RBs
3091  *
3092  * @rdev: radeon_device pointer
3093  * @max_rb_num: max RBs (render backends) for the asic
3094  * @se_num: number of SEs (shader engines) for the asic
3095  * @sh_per_se: number of SH blocks per SE for the asic
3096  *
3097  * Calculates the bitmask of disabled RBs (CIK).
3098  * Returns the disabled RB bitmask.
3099  */
cik_get_rb_disabled(struct radeon_device * rdev,u32 max_rb_num_per_se,u32 sh_per_se)3100 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3101                                     u32 max_rb_num_per_se,
3102                                     u32 sh_per_se)
3103 {
3104           u32 data, mask;
3105 
3106           data = RREG32(CC_RB_BACKEND_DISABLE);
3107           if (data & 1)
3108                     data &= BACKEND_DISABLE_MASK;
3109           else
3110                     data = 0;
3111           data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3112 
3113           data >>= BACKEND_DISABLE_SHIFT;
3114 
3115           mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3116 
3117           return data & mask;
3118 }
3119 
3120 /**
3121  * cik_setup_rb - setup the RBs on the asic
3122  *
3123  * @rdev: radeon_device pointer
3124  * @se_num: number of SEs (shader engines) for the asic
3125  * @sh_per_se: number of SH blocks per SE for the asic
3126  * @max_rb_num: max RBs (render backends) for the asic
3127  *
3128  * Configures per-SE/SH RB registers (CIK).
3129  */
cik_setup_rb(struct radeon_device * rdev,u32 se_num,u32 sh_per_se,u32 max_rb_num_per_se)3130 static void cik_setup_rb(struct radeon_device *rdev,
3131                                u32 se_num, u32 sh_per_se,
3132                                u32 max_rb_num_per_se)
3133 {
3134           int i, j;
3135           u32 data, mask;
3136           u32 disabled_rbs = 0;
3137           u32 enabled_rbs = 0;
3138 
3139           mutex_lock(&rdev->grbm_idx_mutex);
3140           for (i = 0; i < se_num; i++) {
3141                     for (j = 0; j < sh_per_se; j++) {
3142                               cik_select_se_sh(rdev, i, j);
3143                               data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3144                               if (rdev->family == CHIP_HAWAII)
3145                                         disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3146                               else
3147                                         disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3148                     }
3149           }
3150           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3151           mutex_unlock(&rdev->grbm_idx_mutex);
3152 
3153           mask = 1;
3154           for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3155                     if (!(disabled_rbs & mask))
3156                               enabled_rbs |= mask;
3157                     mask <<= 1;
3158           }
3159 
3160           rdev->config.cik.backend_enable_mask = enabled_rbs;
3161 
3162           mutex_lock(&rdev->grbm_idx_mutex);
3163           for (i = 0; i < se_num; i++) {
3164                     cik_select_se_sh(rdev, i, 0xffffffff);
3165                     data = 0;
3166                     for (j = 0; j < sh_per_se; j++) {
3167                               switch (enabled_rbs & 3) {
3168                               case 0:
3169                                         if (j == 0)
3170                                                   data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3171                                         else
3172                                                   data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3173                                         break;
3174                               case 1:
3175                                         data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3176                                         break;
3177                               case 2:
3178                                         data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3179                                         break;
3180                               case 3:
3181                               default:
3182                                         data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3183                                         break;
3184                               }
3185                               enabled_rbs >>= 2;
3186                     }
3187                     WREG32(PA_SC_RASTER_CONFIG, data);
3188           }
3189           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3190           mutex_unlock(&rdev->grbm_idx_mutex);
3191 }
3192 
3193 /**
3194  * cik_gpu_init - setup the 3D engine
3195  *
3196  * @rdev: radeon_device pointer
3197  *
3198  * Configures the 3D engine and tiling configuration
3199  * registers so that the 3D engine is usable.
3200  */
cik_gpu_init(struct radeon_device * rdev)3201 static void cik_gpu_init(struct radeon_device *rdev)
3202 {
3203           u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3204           u32 mc_shared_chmap, mc_arb_ramcfg;
3205           u32 hdp_host_path_cntl;
3206           u32 tmp;
3207           int i, j;
3208 
3209           switch (rdev->family) {
3210           case CHIP_BONAIRE:
3211                     rdev->config.cik.max_shader_engines = 2;
3212                     rdev->config.cik.max_tile_pipes = 4;
3213                     rdev->config.cik.max_cu_per_sh = 7;
3214                     rdev->config.cik.max_sh_per_se = 1;
3215                     rdev->config.cik.max_backends_per_se = 2;
3216                     rdev->config.cik.max_texture_channel_caches = 4;
3217                     rdev->config.cik.max_gprs = 256;
3218                     rdev->config.cik.max_gs_threads = 32;
3219                     rdev->config.cik.max_hw_contexts = 8;
3220 
3221                     rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3222                     rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3223                     rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3224                     rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3225                     gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3226                     break;
3227           case CHIP_HAWAII:
3228                     rdev->config.cik.max_shader_engines = 4;
3229                     rdev->config.cik.max_tile_pipes = 16;
3230                     rdev->config.cik.max_cu_per_sh = 11;
3231                     rdev->config.cik.max_sh_per_se = 1;
3232                     rdev->config.cik.max_backends_per_se = 4;
3233                     rdev->config.cik.max_texture_channel_caches = 16;
3234                     rdev->config.cik.max_gprs = 256;
3235                     rdev->config.cik.max_gs_threads = 32;
3236                     rdev->config.cik.max_hw_contexts = 8;
3237 
3238                     rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3239                     rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3240                     rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3241                     rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3242                     gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3243                     break;
3244           case CHIP_KAVERI:
3245                     rdev->config.cik.max_shader_engines = 1;
3246                     rdev->config.cik.max_tile_pipes = 4;
3247                     rdev->config.cik.max_cu_per_sh = 8;
3248                     rdev->config.cik.max_backends_per_se = 2;
3249                     rdev->config.cik.max_sh_per_se = 1;
3250                     rdev->config.cik.max_texture_channel_caches = 4;
3251                     rdev->config.cik.max_gprs = 256;
3252                     rdev->config.cik.max_gs_threads = 16;
3253                     rdev->config.cik.max_hw_contexts = 8;
3254 
3255                     rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256                     rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257                     rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258                     rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259                     gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260                     break;
3261           case CHIP_KABINI:
3262           case CHIP_MULLINS:
3263           default:
3264                     rdev->config.cik.max_shader_engines = 1;
3265                     rdev->config.cik.max_tile_pipes = 2;
3266                     rdev->config.cik.max_cu_per_sh = 2;
3267                     rdev->config.cik.max_sh_per_se = 1;
3268                     rdev->config.cik.max_backends_per_se = 1;
3269                     rdev->config.cik.max_texture_channel_caches = 2;
3270                     rdev->config.cik.max_gprs = 256;
3271                     rdev->config.cik.max_gs_threads = 16;
3272                     rdev->config.cik.max_hw_contexts = 8;
3273 
3274                     rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3275                     rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3276                     rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3277                     rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3278                     gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3279                     break;
3280           }
3281 
3282           /* Initialize HDP */
3283           for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3284                     WREG32((0x2c14 + j), 0x00000000);
3285                     WREG32((0x2c18 + j), 0x00000000);
3286                     WREG32((0x2c1c + j), 0x00000000);
3287                     WREG32((0x2c20 + j), 0x00000000);
3288                     WREG32((0x2c24 + j), 0x00000000);
3289           }
3290 
3291           WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3292           WREG32(SRBM_INT_CNTL, 0x1);
3293           WREG32(SRBM_INT_ACK, 0x1);
3294 
3295           WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3296 
3297           mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3298           mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3299 
3300           rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3301           rdev->config.cik.mem_max_burst_length_bytes = 256;
3302           tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3303           rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3304           if (rdev->config.cik.mem_row_size_in_kb > 4)
3305                     rdev->config.cik.mem_row_size_in_kb = 4;
3306           /* XXX use MC settings? */
3307           rdev->config.cik.shader_engine_tile_size = 32;
3308           rdev->config.cik.num_gpus = 1;
3309           rdev->config.cik.multi_gpu_tile_size = 64;
3310 
3311           /* fix up row size */
3312           gb_addr_config &= ~ROW_SIZE_MASK;
3313           switch (rdev->config.cik.mem_row_size_in_kb) {
3314           case 1:
3315           default:
3316                     gb_addr_config |= ROW_SIZE(0);
3317                     break;
3318           case 2:
3319                     gb_addr_config |= ROW_SIZE(1);
3320                     break;
3321           case 4:
3322                     gb_addr_config |= ROW_SIZE(2);
3323                     break;
3324           }
3325 
3326           /* setup tiling info dword.  gb_addr_config is not adequate since it does
3327            * not have bank info, so create a custom tiling dword.
3328            * bits 3:0   num_pipes
3329            * bits 7:4   num_banks
3330            * bits 11:8  group_size
3331            * bits 15:12 row_size
3332            */
3333           rdev->config.cik.tile_config = 0;
3334           switch (rdev->config.cik.num_tile_pipes) {
3335           case 1:
3336                     rdev->config.cik.tile_config |= (0 << 0);
3337                     break;
3338           case 2:
3339                     rdev->config.cik.tile_config |= (1 << 0);
3340                     break;
3341           case 4:
3342                     rdev->config.cik.tile_config |= (2 << 0);
3343                     break;
3344           case 8:
3345           default:
3346                     /* XXX what about 12? */
3347                     rdev->config.cik.tile_config |= (3 << 0);
3348                     break;
3349           }
3350           rdev->config.cik.tile_config |=
3351                     ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3352           rdev->config.cik.tile_config |=
3353                     ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3354           rdev->config.cik.tile_config |=
3355                     ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3356 
3357           WREG32(GB_ADDR_CONFIG, gb_addr_config);
3358           WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3359           WREG32(DMIF_ADDR_CALC, gb_addr_config);
3360           WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3361           WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3362           WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3363           WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3364           WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3365 
3366           cik_tiling_mode_table_init(rdev);
3367 
3368           cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3369                          rdev->config.cik.max_sh_per_se,
3370                          rdev->config.cik.max_backends_per_se);
3371 
3372           rdev->config.cik.active_cus = 0;
3373           for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3374                     for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3375                               rdev->config.cik.active_cus +=
3376                                         hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3377                     }
3378           }
3379 
3380           /* set HW defaults for 3D engine */
3381           WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3382 
3383           mutex_lock(&rdev->grbm_idx_mutex);
3384           /*
3385            * making sure that the following register writes will be broadcasted
3386            * to all the shaders
3387            */
3388           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3389           WREG32(SX_DEBUG_1, 0x20);
3390 
3391           WREG32(TA_CNTL_AUX, 0x00010000);
3392 
3393           tmp = RREG32(SPI_CONFIG_CNTL);
3394           tmp |= 0x03000000;
3395           WREG32(SPI_CONFIG_CNTL, tmp);
3396 
3397           WREG32(SQ_CONFIG, 1);
3398 
3399           WREG32(DB_DEBUG, 0);
3400 
3401           tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3402           tmp |= 0x00000400;
3403           WREG32(DB_DEBUG2, tmp);
3404 
3405           tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3406           tmp |= 0x00020200;
3407           WREG32(DB_DEBUG3, tmp);
3408 
3409           tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3410           tmp |= 0x00018208;
3411           WREG32(CB_HW_CONTROL, tmp);
3412 
3413           WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3414 
3415           WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3416                                          SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3417                                          SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3418                                          SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3419 
3420           WREG32(VGT_NUM_INSTANCES, 1);
3421 
3422           WREG32(CP_PERFMON_CNTL, 0);
3423 
3424           WREG32(SQ_CONFIG, 0);
3425 
3426           WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3427                                                     FORCE_EOV_MAX_REZ_CNT(255)));
3428 
3429           WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3430                  AUTO_INVLD_EN(ES_AND_GS_AUTO));
3431 
3432           WREG32(VGT_GS_VERTEX_REUSE, 16);
3433           WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3434 
3435           tmp = RREG32(HDP_MISC_CNTL);
3436           tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3437           WREG32(HDP_MISC_CNTL, tmp);
3438 
3439           hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3440           WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3441 
3442           WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3443           WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3444           mutex_unlock(&rdev->grbm_idx_mutex);
3445 
3446           udelay(50);
3447 }
3448 
3449 /*
3450  * GPU scratch registers helpers function.
3451  */
3452 /**
3453  * cik_scratch_init - setup driver info for CP scratch regs
3454  *
3455  * @rdev: radeon_device pointer
3456  *
3457  * Set up the number and offset of the CP scratch registers.
3458  * NOTE: use of CP scratch registers is a legacy inferface and
3459  * is not used by default on newer asics (r6xx+).  On newer asics,
3460  * memory buffers are used for fences rather than scratch regs.
3461  */
cik_scratch_init(struct radeon_device * rdev)3462 static void cik_scratch_init(struct radeon_device *rdev)
3463 {
3464           int i;
3465 
3466           rdev->scratch.num_reg = 7;
3467           rdev->scratch.reg_base = SCRATCH_REG0;
3468           for (i = 0; i < rdev->scratch.num_reg; i++) {
3469                     rdev->scratch.free[i] = true;
3470                     rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3471           }
3472 }
3473 
3474 /**
3475  * cik_ring_test - basic gfx ring test
3476  *
3477  * @rdev: radeon_device pointer
3478  * @ring: radeon_ring structure holding ring information
3479  *
3480  * Allocate a scratch register and write to it using the gfx ring (CIK).
3481  * Provides a basic gfx ring test to verify that the ring is working.
3482  * Used by cik_cp_gfx_resume();
3483  * Returns 0 on success, error on failure.
3484  */
cik_ring_test(struct radeon_device * rdev,struct radeon_ring * ring)3485 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3486 {
3487           uint32_t scratch;
3488           uint32_t tmp = 0;
3489           unsigned i;
3490           int r;
3491 
3492           r = radeon_scratch_get(rdev, &scratch);
3493           if (r) {
3494                     DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3495                     return r;
3496           }
3497           WREG32(scratch, 0xCAFEDEAD);
3498           r = radeon_ring_lock(rdev, ring, 3);
3499           if (r) {
3500                     DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3501                     radeon_scratch_free(rdev, scratch);
3502                     return r;
3503           }
3504           radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3505           radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3506           radeon_ring_write(ring, 0xDEADBEEF);
3507           radeon_ring_unlock_commit(rdev, ring, false);
3508 
3509           for (i = 0; i < rdev->usec_timeout; i++) {
3510                     tmp = RREG32(scratch);
3511                     if (tmp == 0xDEADBEEF)
3512                               break;
3513                     DRM_UDELAY(1);
3514           }
3515           if (i < rdev->usec_timeout) {
3516                     DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3517           } else {
3518                     DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3519                                 ring->idx, scratch, tmp);
3520                     r = -EINVAL;
3521           }
3522           radeon_scratch_free(rdev, scratch);
3523           return r;
3524 }
3525 
3526 /**
3527  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3528  *
3529  * @rdev: radeon_device pointer
3530  * @ridx: radeon ring index
3531  *
3532  * Emits an hdp flush on the cp.
3533  */
cik_hdp_flush_cp_ring_emit(struct radeon_device * rdev,int ridx)3534 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3535                                                int ridx)
3536 {
3537           struct radeon_ring *ring = &rdev->ring[ridx];
3538           u32 ref_and_mask;
3539 
3540           switch (ring->idx) {
3541           case CAYMAN_RING_TYPE_CP1_INDEX:
3542           case CAYMAN_RING_TYPE_CP2_INDEX:
3543           default:
3544                     switch (ring->me) {
3545                     case 0:
3546                               ref_and_mask = CP2 << ring->pipe;
3547                               break;
3548                     case 1:
3549                               ref_and_mask = CP6 << ring->pipe;
3550                               break;
3551                     default:
3552                               return;
3553                     }
3554                     break;
3555           case RADEON_RING_TYPE_GFX_INDEX:
3556                     ref_and_mask = CP0;
3557                     break;
3558           }
3559 
3560           radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3561           radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3562                                          WAIT_REG_MEM_FUNCTION(3) |  /* == */
3563                                          WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3564           radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3565           radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3566           radeon_ring_write(ring, ref_and_mask);
3567           radeon_ring_write(ring, ref_and_mask);
3568           radeon_ring_write(ring, 0x20); /* poll interval */
3569 }
3570 
3571 /**
3572  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3573  *
3574  * @rdev: radeon_device pointer
3575  * @fence: radeon fence object
3576  *
3577  * Emits a fence sequnce number on the gfx ring and flushes
3578  * GPU caches.
3579  */
cik_fence_gfx_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3580 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3581                                    struct radeon_fence *fence)
3582 {
3583           struct radeon_ring *ring = &rdev->ring[fence->ring];
3584           u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3585 
3586           /* Workaround for cache flush problems. First send a dummy EOP
3587            * event down the pipe with seq one below.
3588            */
3589           radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3590           radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3591                                          EOP_TC_ACTION_EN |
3592                                          EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3593                                          EVENT_INDEX(5)));
3594           radeon_ring_write(ring, addr & 0xfffffffc);
3595           radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3596                                         DATA_SEL(1) | INT_SEL(0));
3597           radeon_ring_write(ring, fence->seq - 1);
3598           radeon_ring_write(ring, 0);
3599 
3600           /* Then send the real EOP event down the pipe. */
3601           radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3602           radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3603                                          EOP_TC_ACTION_EN |
3604                                          EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3605                                          EVENT_INDEX(5)));
3606           radeon_ring_write(ring, addr & 0xfffffffc);
3607           radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3608           radeon_ring_write(ring, fence->seq);
3609           radeon_ring_write(ring, 0);
3610 }
3611 
3612 /**
3613  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3614  *
3615  * @rdev: radeon_device pointer
3616  * @fence: radeon fence object
3617  *
3618  * Emits a fence sequnce number on the compute ring and flushes
3619  * GPU caches.
3620  */
cik_fence_compute_ring_emit(struct radeon_device * rdev,struct radeon_fence * fence)3621 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3622                                          struct radeon_fence *fence)
3623 {
3624           struct radeon_ring *ring = &rdev->ring[fence->ring];
3625           u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3626 
3627           /* RELEASE_MEM - flush caches, send int */
3628           radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3629           radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3630                                          EOP_TC_ACTION_EN |
3631                                          EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3632                                          EVENT_INDEX(5)));
3633           radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3634           radeon_ring_write(ring, addr & 0xfffffffc);
3635           radeon_ring_write(ring, upper_32_bits(addr));
3636           radeon_ring_write(ring, fence->seq);
3637           radeon_ring_write(ring, 0);
3638 }
3639 
3640 /**
3641  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3642  *
3643  * @rdev: radeon_device pointer
3644  * @ring: radeon ring buffer object
3645  * @semaphore: radeon semaphore object
3646  * @emit_wait: Is this a sempahore wait?
3647  *
3648  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3649  * from running ahead of semaphore waits.
3650  */
cik_semaphore_ring_emit(struct radeon_device * rdev,struct radeon_ring * ring,struct radeon_semaphore * semaphore,bool emit_wait)3651 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3652                                    struct radeon_ring *ring,
3653                                    struct radeon_semaphore *semaphore,
3654                                    bool emit_wait)
3655 {
3656           uint64_t addr = semaphore->gpu_addr;
3657           unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3658 
3659           radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3660           radeon_ring_write(ring, lower_32_bits(addr));
3661           radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3662 
3663           if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3664                     /* Prevent the PFP from running ahead of the semaphore wait */
3665                     radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3666                     radeon_ring_write(ring, 0x0);
3667           }
3668 
3669           return true;
3670 }
3671 
3672 /**
3673  * cik_copy_cpdma - copy pages using the CP DMA engine
3674  *
3675  * @rdev: radeon_device pointer
3676  * @src_offset: src GPU address
3677  * @dst_offset: dst GPU address
3678  * @num_gpu_pages: number of GPU pages to xfer
3679  * @resv: reservation object to sync to
3680  *
3681  * Copy GPU paging using the CP DMA engine (CIK+).
3682  * Used by the radeon ttm implementation to move pages if
3683  * registered as the asic copy callback.
3684  */
cik_copy_cpdma(struct radeon_device * rdev,uint64_t src_offset,uint64_t dst_offset,unsigned num_gpu_pages,struct reservation_object * resv)3685 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3686                                             uint64_t src_offset, uint64_t dst_offset,
3687                                             unsigned num_gpu_pages,
3688                                             struct reservation_object *resv)
3689 {
3690           struct radeon_fence *fence;
3691           struct radeon_sync sync;
3692           int ring_index = rdev->asic->copy.blit_ring_index;
3693           struct radeon_ring *ring = &rdev->ring[ring_index];
3694           u32 size_in_bytes, cur_size_in_bytes, control;
3695           int i, num_loops;
3696           int r = 0;
3697 
3698           radeon_sync_create(&sync);
3699 
3700           size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3701           num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3702           r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3703           if (r) {
3704                     DRM_ERROR("radeon: moving bo (%d).\n", r);
3705                     radeon_sync_free(rdev, &sync, NULL);
3706                     return ERR_PTR(r);
3707           }
3708 
3709           radeon_sync_resv(rdev, &sync, resv, false);
3710           radeon_sync_rings(rdev, &sync, ring->idx);
3711 
3712           for (i = 0; i < num_loops; i++) {
3713                     cur_size_in_bytes = size_in_bytes;
3714                     if (cur_size_in_bytes > 0x1fffff)
3715                               cur_size_in_bytes = 0x1fffff;
3716                     size_in_bytes -= cur_size_in_bytes;
3717                     control = 0;
3718                     if (size_in_bytes == 0)
3719                               control |= PACKET3_DMA_DATA_CP_SYNC;
3720                     radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3721                     radeon_ring_write(ring, control);
3722                     radeon_ring_write(ring, lower_32_bits(src_offset));
3723                     radeon_ring_write(ring, upper_32_bits(src_offset));
3724                     radeon_ring_write(ring, lower_32_bits(dst_offset));
3725                     radeon_ring_write(ring, upper_32_bits(dst_offset));
3726                     radeon_ring_write(ring, cur_size_in_bytes);
3727                     src_offset += cur_size_in_bytes;
3728                     dst_offset += cur_size_in_bytes;
3729           }
3730 
3731           r = radeon_fence_emit(rdev, &fence, ring->idx);
3732           if (r) {
3733                     radeon_ring_unlock_undo(rdev, ring);
3734                     radeon_sync_free(rdev, &sync, NULL);
3735                     return ERR_PTR(r);
3736           }
3737 
3738           radeon_ring_unlock_commit(rdev, ring, false);
3739           radeon_sync_free(rdev, &sync, fence);
3740 
3741           return fence;
3742 }
3743 
3744 /*
3745  * IB stuff
3746  */
3747 /**
3748  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3749  *
3750  * @rdev: radeon_device pointer
3751  * @ib: radeon indirect buffer object
3752  *
3753  * Emits a DE (drawing engine) or CE (constant engine) IB
3754  * on the gfx ring.  IBs are usually generated by userspace
3755  * acceleration drivers and submitted to the kernel for
3756  * scheduling on the ring.  This function schedules the IB
3757  * on the gfx ring for execution by the GPU.
3758  */
cik_ring_ib_execute(struct radeon_device * rdev,struct radeon_ib * ib)3759 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3760 {
3761           struct radeon_ring *ring = &rdev->ring[ib->ring];
3762           unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3763           u32 header, control = INDIRECT_BUFFER_VALID;
3764 
3765           if (ib->is_const_ib) {
3766                     /* set switch buffer packet before const IB */
3767                     radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3768                     radeon_ring_write(ring, 0);
3769 
3770                     header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3771           } else {
3772                     u32 next_rptr;
3773                     if (ring->rptr_save_reg) {
3774                               next_rptr = ring->wptr + 3 + 4;
3775                               radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3776                               radeon_ring_write(ring, ((ring->rptr_save_reg -
3777                                                               PACKET3_SET_UCONFIG_REG_START) >> 2));
3778                               radeon_ring_write(ring, next_rptr);
3779                     } else if (rdev->wb.enabled) {
3780                               next_rptr = ring->wptr + 5 + 4;
3781                               radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3782                               radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3783                               radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3784                               radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3785                               radeon_ring_write(ring, next_rptr);
3786                     }
3787 
3788                     header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3789           }
3790 
3791           control |= ib->length_dw | (vm_id << 24);
3792 
3793           radeon_ring_write(ring, header);
3794           radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3795           radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3796           radeon_ring_write(ring, control);
3797 }
3798 
3799 /**
3800  * cik_ib_test - basic gfx ring IB test
3801  *
3802  * @rdev: radeon_device pointer
3803  * @ring: radeon_ring structure holding ring information
3804  *
3805  * Allocate an IB and execute it on the gfx ring (CIK).
3806  * Provides a basic gfx ring test to verify that IBs are working.
3807  * Returns 0 on success, error on failure.
3808  */
cik_ib_test(struct radeon_device * rdev,struct radeon_ring * ring)3809 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3810 {
3811           struct radeon_ib ib;
3812           uint32_t scratch;
3813           uint32_t tmp = 0;
3814           unsigned i;
3815           int r;
3816 
3817           r = radeon_scratch_get(rdev, &scratch);
3818           if (r) {
3819                     DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3820                     return r;
3821           }
3822           WREG32(scratch, 0xCAFEDEAD);
3823           r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3824           if (r) {
3825                     DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3826                     radeon_scratch_free(rdev, scratch);
3827                     return r;
3828           }
3829           ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3830           ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3831           ib.ptr[2] = 0xDEADBEEF;
3832           ib.length_dw = 3;
3833           r = radeon_ib_schedule(rdev, &ib, NULL, false);
3834           if (r) {
3835                     radeon_scratch_free(rdev, scratch);
3836                     radeon_ib_free(rdev, &ib);
3837                     DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3838                     return r;
3839           }
3840           r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3841                     RADEON_USEC_IB_TEST_TIMEOUT));
3842           if (r < 0) {
3843                     DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3844                     radeon_scratch_free(rdev, scratch);
3845                     radeon_ib_free(rdev, &ib);
3846                     return r;
3847           } else if (r == 0) {
3848                     DRM_ERROR("radeon: fence wait timed out.\n");
3849                     radeon_scratch_free(rdev, scratch);
3850                     radeon_ib_free(rdev, &ib);
3851                     return -ETIMEDOUT;
3852           }
3853           r = 0;
3854           for (i = 0; i < rdev->usec_timeout; i++) {
3855                     tmp = RREG32(scratch);
3856                     if (tmp == 0xDEADBEEF)
3857                               break;
3858                     DRM_UDELAY(1);
3859           }
3860           if (i < rdev->usec_timeout) {
3861                     DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3862           } else {
3863                     DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3864                                 scratch, tmp);
3865                     r = -EINVAL;
3866           }
3867           radeon_scratch_free(rdev, scratch);
3868           radeon_ib_free(rdev, &ib);
3869           return r;
3870 }
3871 
3872 /*
3873  * CP.
3874  * On CIK, gfx and compute now have independant command processors.
3875  *
3876  * GFX
3877  * Gfx consists of a single ring and can process both gfx jobs and
3878  * compute jobs.  The gfx CP consists of three microengines (ME):
3879  * PFP - Pre-Fetch Parser
3880  * ME - Micro Engine
3881  * CE - Constant Engine
3882  * The PFP and ME make up what is considered the Drawing Engine (DE).
3883  * The CE is an asynchronous engine used for updating buffer desciptors
3884  * used by the DE so that they can be loaded into cache in parallel
3885  * while the DE is processing state update packets.
3886  *
3887  * Compute
3888  * The compute CP consists of two microengines (ME):
3889  * MEC1 - Compute MicroEngine 1
3890  * MEC2 - Compute MicroEngine 2
3891  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3892  * The queues are exposed to userspace and are programmed directly
3893  * by the compute runtime.
3894  */
3895 /**
3896  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3897  *
3898  * @rdev: radeon_device pointer
3899  * @enable: enable or disable the MEs
3900  *
3901  * Halts or unhalts the gfx MEs.
3902  */
cik_cp_gfx_enable(struct radeon_device * rdev,bool enable)3903 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3904 {
3905           if (enable)
3906                     WREG32(CP_ME_CNTL, 0);
3907           else {
3908                     if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3909                               radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3910                     WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3911                     rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3912           }
3913           udelay(50);
3914 }
3915 
3916 /**
3917  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3918  *
3919  * @rdev: radeon_device pointer
3920  *
3921  * Loads the gfx PFP, ME, and CE ucode.
3922  * Returns 0 for success, -EINVAL if the ucode is not available.
3923  */
cik_cp_gfx_load_microcode(struct radeon_device * rdev)3924 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3925 {
3926           int i;
3927 
3928           if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3929                     return -EINVAL;
3930 
3931           cik_cp_gfx_enable(rdev, false);
3932 
3933           if (rdev->new_fw) {
3934                     const struct gfx_firmware_header_v1_0 *pfp_hdr =
3935                               (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3936                     const struct gfx_firmware_header_v1_0 *ce_hdr =
3937                               (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3938                     const struct gfx_firmware_header_v1_0 *me_hdr =
3939                               (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3940                     const __le32 *fw_data;
3941                     u32 fw_size;
3942 
3943                     radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3944                     radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3945                     radeon_ucode_print_gfx_hdr(&me_hdr->header);
3946 
3947                     /* PFP */
3948                     fw_data = (const __le32 *)
3949                               (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3950                     fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3951                     WREG32(CP_PFP_UCODE_ADDR, 0);
3952                     for (i = 0; i < fw_size; i++)
3953                               WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3954                     WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3955 
3956                     /* CE */
3957                     fw_data = (const __le32 *)
3958                               (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3959                     fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3960                     WREG32(CP_CE_UCODE_ADDR, 0);
3961                     for (i = 0; i < fw_size; i++)
3962                               WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3963                     WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3964 
3965                     /* ME */
3966                     fw_data = (const __be32 *)
3967                               (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3968                     fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3969                     WREG32(CP_ME_RAM_WADDR, 0);
3970                     for (i = 0; i < fw_size; i++)
3971                               WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3972                     WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3973                     WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3974           } else {
3975                     const __be32 *fw_data;
3976 
3977                     /* PFP */
3978                     fw_data = (const __be32 *)rdev->pfp_fw->data;
3979                     WREG32(CP_PFP_UCODE_ADDR, 0);
3980                     for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3981                               WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3982                     WREG32(CP_PFP_UCODE_ADDR, 0);
3983 
3984                     /* CE */
3985                     fw_data = (const __be32 *)rdev->ce_fw->data;
3986                     WREG32(CP_CE_UCODE_ADDR, 0);
3987                     for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3988                               WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3989                     WREG32(CP_CE_UCODE_ADDR, 0);
3990 
3991                     /* ME */
3992                     fw_data = (const __be32 *)rdev->me_fw->data;
3993                     WREG32(CP_ME_RAM_WADDR, 0);
3994                     for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3995                               WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3996                     WREG32(CP_ME_RAM_WADDR, 0);
3997           }
3998 
3999           return 0;
4000 }
4001 
4002 /**
4003  * cik_cp_gfx_start - start the gfx ring
4004  *
4005  * @rdev: radeon_device pointer
4006  *
4007  * Enables the ring and loads the clear state context and other
4008  * packets required to init the ring.
4009  * Returns 0 for success, error for failure.
4010  */
cik_cp_gfx_start(struct radeon_device * rdev)4011 static int cik_cp_gfx_start(struct radeon_device *rdev)
4012 {
4013           struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4014           int r, i;
4015 
4016           /* init the CP */
4017           WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4018           WREG32(CP_ENDIAN_SWAP, 0);
4019           WREG32(CP_DEVICE_ID, 1);
4020 
4021           cik_cp_gfx_enable(rdev, true);
4022 
4023           r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4024           if (r) {
4025                     DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4026                     return r;
4027           }
4028 
4029           /* init the CE partitions.  CE only used for gfx on CIK */
4030           radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4031           radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4032           radeon_ring_write(ring, 0x8000);
4033           radeon_ring_write(ring, 0x8000);
4034 
4035           /* setup clear context state */
4036           radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4037           radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4038 
4039           radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4040           radeon_ring_write(ring, 0x80000000);
4041           radeon_ring_write(ring, 0x80000000);
4042 
4043           for (i = 0; i < cik_default_size; i++)
4044                     radeon_ring_write(ring, cik_default_state[i]);
4045 
4046           radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4047           radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4048 
4049           /* set clear context state */
4050           radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4051           radeon_ring_write(ring, 0);
4052 
4053           radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4054           radeon_ring_write(ring, 0x00000316);
4055           radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4056           radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4057 
4058           radeon_ring_unlock_commit(rdev, ring, false);
4059 
4060           return 0;
4061 }
4062 
4063 /**
4064  * cik_cp_gfx_fini - stop the gfx ring
4065  *
4066  * @rdev: radeon_device pointer
4067  *
4068  * Stop the gfx ring and tear down the driver ring
4069  * info.
4070  */
cik_cp_gfx_fini(struct radeon_device * rdev)4071 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4072 {
4073           cik_cp_gfx_enable(rdev, false);
4074           radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4075 }
4076 
4077 /**
4078  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4079  *
4080  * @rdev: radeon_device pointer
4081  *
4082  * Program the location and size of the gfx ring buffer
4083  * and test it to make sure it's working.
4084  * Returns 0 for success, error for failure.
4085  */
cik_cp_gfx_resume(struct radeon_device * rdev)4086 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4087 {
4088           struct radeon_ring *ring;
4089           u32 tmp;
4090           u32 rb_bufsz;
4091           u64 rb_addr;
4092           int r;
4093 
4094           WREG32(CP_SEM_WAIT_TIMER, 0x0);
4095           if (rdev->family != CHIP_HAWAII)
4096                     WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4097 
4098           /* Set the write pointer delay */
4099           WREG32(CP_RB_WPTR_DELAY, 0);
4100 
4101           /* set the RB to use vmid 0 */
4102           WREG32(CP_RB_VMID, 0);
4103 
4104           WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4105 
4106           /* ring 0 - compute and gfx */
4107           /* Set ring buffer size */
4108           ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4109           rb_bufsz = order_base_2(ring->ring_size / 8);
4110           tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4111 #ifdef __BIG_ENDIAN
4112           tmp |= BUF_SWAP_32BIT;
4113 #endif
4114           WREG32(CP_RB0_CNTL, tmp);
4115 
4116           /* Initialize the ring buffer's read and write pointers */
4117           WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4118           ring->wptr = 0;
4119           WREG32(CP_RB0_WPTR, ring->wptr);
4120 
4121           /* set the wb address wether it's enabled or not */
4122           WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4123           WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4124 
4125           /* scratch register shadowing is no longer supported */
4126           WREG32(SCRATCH_UMSK, 0);
4127 
4128           if (!rdev->wb.enabled)
4129                     tmp |= RB_NO_UPDATE;
4130 
4131           mdelay(1);
4132           WREG32(CP_RB0_CNTL, tmp);
4133 
4134           rb_addr = ring->gpu_addr >> 8;
4135           WREG32(CP_RB0_BASE, rb_addr);
4136           WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4137 
4138           /* start the ring */
4139           cik_cp_gfx_start(rdev);
4140           rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4141           r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4142           if (r) {
4143                     rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4144                     return r;
4145           }
4146 
4147           if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4148                     radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4149 
4150           return 0;
4151 }
4152 
cik_gfx_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4153 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4154                          struct radeon_ring *ring)
4155 {
4156           u32 rptr;
4157 
4158           if (rdev->wb.enabled)
4159                     rptr = rdev->wb.wb[ring->rptr_offs/4];
4160           else
4161                     rptr = RREG32(CP_RB0_RPTR);
4162 
4163           return rptr;
4164 }
4165 
cik_gfx_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4166 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4167                          struct radeon_ring *ring)
4168 {
4169           return RREG32(CP_RB0_WPTR);
4170 }
4171 
cik_gfx_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4172 void cik_gfx_set_wptr(struct radeon_device *rdev,
4173                           struct radeon_ring *ring)
4174 {
4175           WREG32(CP_RB0_WPTR, ring->wptr);
4176           (void)RREG32(CP_RB0_WPTR);
4177 }
4178 
cik_compute_get_rptr(struct radeon_device * rdev,struct radeon_ring * ring)4179 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4180                                struct radeon_ring *ring)
4181 {
4182           u32 rptr;
4183 
4184           if (rdev->wb.enabled) {
4185                     rptr = rdev->wb.wb[ring->rptr_offs/4];
4186           } else {
4187                     mutex_lock(&rdev->srbm_mutex);
4188                     cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4189                     rptr = RREG32(CP_HQD_PQ_RPTR);
4190                     cik_srbm_select(rdev, 0, 0, 0, 0);
4191                     mutex_unlock(&rdev->srbm_mutex);
4192           }
4193 
4194           return rptr;
4195 }
4196 
cik_compute_get_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4197 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4198                                struct radeon_ring *ring)
4199 {
4200           u32 wptr;
4201 
4202           if (rdev->wb.enabled) {
4203                     /* XXX check if swapping is necessary on BE */
4204                     wptr = rdev->wb.wb[ring->wptr_offs/4];
4205           } else {
4206                     mutex_lock(&rdev->srbm_mutex);
4207                     cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4208                     wptr = RREG32(CP_HQD_PQ_WPTR);
4209                     cik_srbm_select(rdev, 0, 0, 0, 0);
4210                     mutex_unlock(&rdev->srbm_mutex);
4211           }
4212 
4213           return wptr;
4214 }
4215 
cik_compute_set_wptr(struct radeon_device * rdev,struct radeon_ring * ring)4216 void cik_compute_set_wptr(struct radeon_device *rdev,
4217                                 struct radeon_ring *ring)
4218 {
4219           /* XXX check if swapping is necessary on BE */
4220           rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4221           WDOORBELL32(ring->doorbell_index, ring->wptr);
4222 }
4223 
cik_compute_stop(struct radeon_device * rdev,struct radeon_ring * ring)4224 static void cik_compute_stop(struct radeon_device *rdev,
4225                                    struct radeon_ring *ring)
4226 {
4227           u32 j, tmp;
4228 
4229           cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4230           /* Disable wptr polling. */
4231           tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4232           tmp &= ~WPTR_POLL_EN;
4233           WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4234           /* Disable HQD. */
4235           if (RREG32(CP_HQD_ACTIVE) & 1) {
4236                     WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4237                     for (j = 0; j < rdev->usec_timeout; j++) {
4238                               if (!(RREG32(CP_HQD_ACTIVE) & 1))
4239                                         break;
4240                               udelay(1);
4241                     }
4242                     WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4243                     WREG32(CP_HQD_PQ_RPTR, 0);
4244                     WREG32(CP_HQD_PQ_WPTR, 0);
4245           }
4246           cik_srbm_select(rdev, 0, 0, 0, 0);
4247 }
4248 
4249 /**
4250  * cik_cp_compute_enable - enable/disable the compute CP MEs
4251  *
4252  * @rdev: radeon_device pointer
4253  * @enable: enable or disable the MEs
4254  *
4255  * Halts or unhalts the compute MEs.
4256  */
cik_cp_compute_enable(struct radeon_device * rdev,bool enable)4257 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4258 {
4259           if (enable)
4260                     WREG32(CP_MEC_CNTL, 0);
4261           else {
4262                     /*
4263                      * To make hibernation reliable we need to clear compute ring
4264                      * configuration before halting the compute ring.
4265                      */
4266                     mutex_lock(&rdev->srbm_mutex);
4267                     cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4268                     cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4269                     mutex_unlock(&rdev->srbm_mutex);
4270 
4271                     WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4272                     rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4273                     rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4274           }
4275           udelay(50);
4276 }
4277 
4278 /**
4279  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4280  *
4281  * @rdev: radeon_device pointer
4282  *
4283  * Loads the compute MEC1&2 ucode.
4284  * Returns 0 for success, -EINVAL if the ucode is not available.
4285  */
cik_cp_compute_load_microcode(struct radeon_device * rdev)4286 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4287 {
4288           int i;
4289 
4290           if (!rdev->mec_fw)
4291                     return -EINVAL;
4292 
4293           cik_cp_compute_enable(rdev, false);
4294 
4295           if (rdev->new_fw) {
4296                     const struct gfx_firmware_header_v1_0 *mec_hdr =
4297                               (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4298                     const __le32 *fw_data;
4299                     u32 fw_size;
4300 
4301                     radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4302 
4303                     /* MEC1 */
4304                     fw_data = (const __le32 *)
4305                               (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4306                     fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4307                     WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4308                     for (i = 0; i < fw_size; i++)
4309                               WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4310                     WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4311 
4312                     /* MEC2 */
4313                     if (rdev->family == CHIP_KAVERI) {
4314                               const struct gfx_firmware_header_v1_0 *mec2_hdr =
4315                                         (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4316 
4317                               fw_data = (const __le32 *)
4318                                         (rdev->mec2_fw->data +
4319                                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4320                               fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4321                               WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4322                               for (i = 0; i < fw_size; i++)
4323                                         WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4324                               WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4325                     }
4326           } else {
4327                     const __be32 *fw_data;
4328 
4329                     /* MEC1 */
4330                     fw_data = (const __be32 *)rdev->mec_fw->data;
4331                     WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4332                     for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4333                               WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4334                     WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4335 
4336                     if (rdev->family == CHIP_KAVERI) {
4337                               /* MEC2 */
4338                               fw_data = (const __be32 *)rdev->mec_fw->data;
4339                               WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4340                               for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4341                                         WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4342                               WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4343                     }
4344           }
4345 
4346           return 0;
4347 }
4348 
4349 /**
4350  * cik_cp_compute_start - start the compute queues
4351  *
4352  * @rdev: radeon_device pointer
4353  *
4354  * Enable the compute queues.
4355  * Returns 0 for success, error for failure.
4356  */
cik_cp_compute_start(struct radeon_device * rdev)4357 static int cik_cp_compute_start(struct radeon_device *rdev)
4358 {
4359           cik_cp_compute_enable(rdev, true);
4360 
4361           return 0;
4362 }
4363 
4364 /**
4365  * cik_cp_compute_fini - stop the compute queues
4366  *
4367  * @rdev: radeon_device pointer
4368  *
4369  * Stop the compute queues and tear down the driver queue
4370  * info.
4371  */
cik_cp_compute_fini(struct radeon_device * rdev)4372 static void cik_cp_compute_fini(struct radeon_device *rdev)
4373 {
4374           int i, idx, r;
4375 
4376           cik_cp_compute_enable(rdev, false);
4377 
4378           for (i = 0; i < 2; i++) {
4379                     if (i == 0)
4380                               idx = CAYMAN_RING_TYPE_CP1_INDEX;
4381                     else
4382                               idx = CAYMAN_RING_TYPE_CP2_INDEX;
4383 
4384                     if (rdev->ring[idx].mqd_obj) {
4385                               r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4386                               if (unlikely(r != 0))
4387                                         dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4388 
4389                               radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4390                               radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4391 
4392                               radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4393                               rdev->ring[idx].mqd_obj = NULL;
4394                     }
4395           }
4396 }
4397 
cik_mec_fini(struct radeon_device * rdev)4398 static void cik_mec_fini(struct radeon_device *rdev)
4399 {
4400           int r;
4401 
4402           if (rdev->mec.hpd_eop_obj) {
4403                     r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4404                     if (unlikely(r != 0))
4405                               dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4406                     radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4407                     radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4408 
4409                     radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4410                     rdev->mec.hpd_eop_obj = NULL;
4411           }
4412 }
4413 
4414 #define MEC_HPD_SIZE 2048
4415 
cik_mec_init(struct radeon_device * rdev)4416 static int cik_mec_init(struct radeon_device *rdev)
4417 {
4418           int r;
4419           u32 *hpd;
4420 
4421           /*
4422            * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4423            * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4424            * Nonetheless, we assign only 1 pipe because all other pipes will
4425            * be handled by KFD
4426            */
4427           rdev->mec.num_mec = 1;
4428           rdev->mec.num_pipe = 1;
4429           rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4430 
4431           if (rdev->mec.hpd_eop_obj == NULL) {
4432                     r = radeon_bo_create(rdev,
4433                                              rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4434                                              PAGE_SIZE, true,
4435                                              RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4436                                              &rdev->mec.hpd_eop_obj);
4437                     if (r) {
4438                               dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4439                               return r;
4440                     }
4441           }
4442 
4443           r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4444           if (unlikely(r != 0)) {
4445                     cik_mec_fini(rdev);
4446                     return r;
4447           }
4448           r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4449                                 &rdev->mec.hpd_eop_gpu_addr);
4450           if (r) {
4451                     dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4452                     cik_mec_fini(rdev);
4453                     return r;
4454           }
4455           r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4456           if (r) {
4457                     dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4458                     cik_mec_fini(rdev);
4459                     return r;
4460           }
4461 
4462           /* clear memory.  Not sure if this is required or not */
4463           memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4464 
4465           radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4466           radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4467 
4468           return 0;
4469 }
4470 
4471 struct hqd_registers
4472 {
4473           u32 cp_mqd_base_addr;
4474           u32 cp_mqd_base_addr_hi;
4475           u32 cp_hqd_active;
4476           u32 cp_hqd_vmid;
4477           u32 cp_hqd_persistent_state;
4478           u32 cp_hqd_pipe_priority;
4479           u32 cp_hqd_queue_priority;
4480           u32 cp_hqd_quantum;
4481           u32 cp_hqd_pq_base;
4482           u32 cp_hqd_pq_base_hi;
4483           u32 cp_hqd_pq_rptr;
4484           u32 cp_hqd_pq_rptr_report_addr;
4485           u32 cp_hqd_pq_rptr_report_addr_hi;
4486           u32 cp_hqd_pq_wptr_poll_addr;
4487           u32 cp_hqd_pq_wptr_poll_addr_hi;
4488           u32 cp_hqd_pq_doorbell_control;
4489           u32 cp_hqd_pq_wptr;
4490           u32 cp_hqd_pq_control;
4491           u32 cp_hqd_ib_base_addr;
4492           u32 cp_hqd_ib_base_addr_hi;
4493           u32 cp_hqd_ib_rptr;
4494           u32 cp_hqd_ib_control;
4495           u32 cp_hqd_iq_timer;
4496           u32 cp_hqd_iq_rptr;
4497           u32 cp_hqd_dequeue_request;
4498           u32 cp_hqd_dma_offload;
4499           u32 cp_hqd_sema_cmd;
4500           u32 cp_hqd_msg_type;
4501           u32 cp_hqd_atomic0_preop_lo;
4502           u32 cp_hqd_atomic0_preop_hi;
4503           u32 cp_hqd_atomic1_preop_lo;
4504           u32 cp_hqd_atomic1_preop_hi;
4505           u32 cp_hqd_hq_scheduler0;
4506           u32 cp_hqd_hq_scheduler1;
4507           u32 cp_mqd_control;
4508 };
4509 
4510 struct bonaire_mqd
4511 {
4512           u32 header;
4513           u32 dispatch_initiator;
4514           u32 dimensions[3];
4515           u32 start_idx[3];
4516           u32 num_threads[3];
4517           u32 pipeline_stat_enable;
4518           u32 perf_counter_enable;
4519           u32 pgm[2];
4520           u32 tba[2];
4521           u32 tma[2];
4522           u32 pgm_rsrc[2];
4523           u32 vmid;
4524           u32 resource_limits;
4525           u32 static_thread_mgmt01[2];
4526           u32 tmp_ring_size;
4527           u32 static_thread_mgmt23[2];
4528           u32 restart[3];
4529           u32 thread_trace_enable;
4530           u32 reserved1;
4531           u32 user_data[16];
4532           u32 vgtcs_invoke_count[2];
4533           struct hqd_registers queue_state;
4534           u32 dequeue_cntr;
4535           u32 interrupt_queue[64];
4536 };
4537 
4538 /**
4539  * cik_cp_compute_resume - setup the compute queue registers
4540  *
4541  * @rdev: radeon_device pointer
4542  *
4543  * Program the compute queues and test them to make sure they
4544  * are working.
4545  * Returns 0 for success, error for failure.
4546  */
cik_cp_compute_resume(struct radeon_device * rdev)4547 static int cik_cp_compute_resume(struct radeon_device *rdev)
4548 {
4549           int r, i, j, idx;
4550           u32 tmp;
4551           bool use_doorbell = true;
4552           u64 hqd_gpu_addr;
4553           u64 mqd_gpu_addr;
4554           u64 eop_gpu_addr;
4555           u64 wb_gpu_addr;
4556           u32 *buf;
4557           struct bonaire_mqd *mqd;
4558 
4559           r = cik_cp_compute_start(rdev);
4560           if (r)
4561                     return r;
4562 
4563           /* fix up chicken bits */
4564           tmp = RREG32(CP_CPF_DEBUG);
4565           tmp |= (1 << 23);
4566           WREG32(CP_CPF_DEBUG, tmp);
4567 
4568           /* init the pipes */
4569           mutex_lock(&rdev->srbm_mutex);
4570 
4571           for (i = 0; i < rdev->mec.num_pipe; ++i) {
4572                     cik_srbm_select(rdev, 0, i, 0, 0);
4573 
4574                     eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4575                     /* write the EOP addr */
4576                     WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4577                     WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4578 
4579                     /* set the VMID assigned */
4580                     WREG32(CP_HPD_EOP_VMID, 0);
4581 
4582                     /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4583                     tmp = RREG32(CP_HPD_EOP_CONTROL);
4584                     tmp &= ~EOP_SIZE_MASK;
4585                     tmp |= order_base_2(MEC_HPD_SIZE / 8);
4586                     WREG32(CP_HPD_EOP_CONTROL, tmp);
4587 
4588           }
4589           mutex_unlock(&rdev->srbm_mutex);
4590 
4591           /* init the queues.  Just two for now. */
4592           for (i = 0; i < 2; i++) {
4593                     if (i == 0)
4594                               idx = CAYMAN_RING_TYPE_CP1_INDEX;
4595                     else
4596                               idx = CAYMAN_RING_TYPE_CP2_INDEX;
4597 
4598                     if (rdev->ring[idx].mqd_obj == NULL) {
4599                               r = radeon_bo_create(rdev,
4600                                                        sizeof(struct bonaire_mqd),
4601                                                        PAGE_SIZE, true,
4602                                                        RADEON_GEM_DOMAIN_GTT, 0, NULL,
4603                                                        NULL, &rdev->ring[idx].mqd_obj);
4604                               if (r) {
4605                                         dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4606                                         return r;
4607                               }
4608                     }
4609 
4610                     r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4611                     if (unlikely(r != 0)) {
4612                               cik_cp_compute_fini(rdev);
4613                               return r;
4614                     }
4615                     r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4616                                           &mqd_gpu_addr);
4617                     if (r) {
4618                               dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4619                               cik_cp_compute_fini(rdev);
4620                               return r;
4621                     }
4622                     r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4623                     if (r) {
4624                               dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4625                               cik_cp_compute_fini(rdev);
4626                               return r;
4627                     }
4628 
4629                     /* init the mqd struct */
4630                     memset(buf, 0, sizeof(struct bonaire_mqd));
4631 
4632                     mqd = (struct bonaire_mqd *)buf;
4633                     mqd->header = 0xC0310800;
4634                     mqd->static_thread_mgmt01[0] = 0xffffffff;
4635                     mqd->static_thread_mgmt01[1] = 0xffffffff;
4636                     mqd->static_thread_mgmt23[0] = 0xffffffff;
4637                     mqd->static_thread_mgmt23[1] = 0xffffffff;
4638 
4639                     mutex_lock(&rdev->srbm_mutex);
4640                     cik_srbm_select(rdev, rdev->ring[idx].me,
4641                                         rdev->ring[idx].pipe,
4642                                         rdev->ring[idx].queue, 0);
4643 
4644                     /* disable wptr polling */
4645                     tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4646                     tmp &= ~WPTR_POLL_EN;
4647                     WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4648 
4649                     /* enable doorbell? */
4650                     mqd->queue_state.cp_hqd_pq_doorbell_control =
4651                               RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4652                     if (use_doorbell)
4653                               mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4654                     else
4655                               mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4656                     WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4657                            mqd->queue_state.cp_hqd_pq_doorbell_control);
4658 
4659                     /* disable the queue if it's active */
4660                     mqd->queue_state.cp_hqd_dequeue_request = 0;
4661                     mqd->queue_state.cp_hqd_pq_rptr = 0;
4662                     mqd->queue_state.cp_hqd_pq_wptr= 0;
4663                     if (RREG32(CP_HQD_ACTIVE) & 1) {
4664                               WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4665                               for (j = 0; j < rdev->usec_timeout; j++) {
4666                                         if (!(RREG32(CP_HQD_ACTIVE) & 1))
4667                                                   break;
4668                                         udelay(1);
4669                               }
4670                               WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4671                               WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4672                               WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4673                     }
4674 
4675                     /* set the pointer to the MQD */
4676                     mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4677                     mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4678                     WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4679                     WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4680                     /* set MQD vmid to 0 */
4681                     mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4682                     mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4683                     WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4684 
4685                     /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4686                     hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4687                     mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4688                     mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4689                     WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4690                     WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4691 
4692                     /* set up the HQD, this is similar to CP_RB0_CNTL */
4693                     mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4694                     mqd->queue_state.cp_hqd_pq_control &=
4695                               ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4696 
4697                     mqd->queue_state.cp_hqd_pq_control |=
4698                               order_base_2(rdev->ring[idx].ring_size / 8);
4699                     mqd->queue_state.cp_hqd_pq_control |=
4700                               (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4701 #ifdef __BIG_ENDIAN
4702                     mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4703 #endif
4704                     mqd->queue_state.cp_hqd_pq_control &=
4705                               ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4706                     mqd->queue_state.cp_hqd_pq_control |=
4707                               PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4708                     WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4709 
4710                     /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4711                     if (i == 0)
4712                               wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4713                     else
4714                               wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4715                     mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4716                     mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4717                     WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4718                     WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4719                            mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4720 
4721                     /* set the wb address wether it's enabled or not */
4722                     if (i == 0)
4723                               wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4724                     else
4725                               wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4726                     mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4727                     mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4728                               upper_32_bits(wb_gpu_addr) & 0xffff;
4729                     WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4730                            mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4731                     WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4732                            mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4733 
4734                     /* enable the doorbell if requested */
4735                     if (use_doorbell) {
4736                               mqd->queue_state.cp_hqd_pq_doorbell_control =
4737                                         RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4738                               mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4739                               mqd->queue_state.cp_hqd_pq_doorbell_control |=
4740                                         DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4741                               mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4742                               mqd->queue_state.cp_hqd_pq_doorbell_control &=
4743                                         ~(DOORBELL_SOURCE | DOORBELL_HIT);
4744 
4745                     } else {
4746                               mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4747                     }
4748                     WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4749                            mqd->queue_state.cp_hqd_pq_doorbell_control);
4750 
4751                     /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4752                     rdev->ring[idx].wptr = 0;
4753                     mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4754                     WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4755                     mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4756 
4757                     /* set the vmid for the queue */
4758                     mqd->queue_state.cp_hqd_vmid = 0;
4759                     WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4760 
4761                     /* activate the queue */
4762                     mqd->queue_state.cp_hqd_active = 1;
4763                     WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4764 
4765                     cik_srbm_select(rdev, 0, 0, 0, 0);
4766                     mutex_unlock(&rdev->srbm_mutex);
4767 
4768                     radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4769                     radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4770 
4771                     rdev->ring[idx].ready = true;
4772                     r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4773                     if (r)
4774                               rdev->ring[idx].ready = false;
4775           }
4776 
4777           return 0;
4778 }
4779 
cik_cp_enable(struct radeon_device * rdev,bool enable)4780 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4781 {
4782           cik_cp_gfx_enable(rdev, enable);
4783           cik_cp_compute_enable(rdev, enable);
4784 }
4785 
cik_cp_load_microcode(struct radeon_device * rdev)4786 static int cik_cp_load_microcode(struct radeon_device *rdev)
4787 {
4788           int r;
4789 
4790           r = cik_cp_gfx_load_microcode(rdev);
4791           if (r)
4792                     return r;
4793           r = cik_cp_compute_load_microcode(rdev);
4794           if (r)
4795                     return r;
4796 
4797           return 0;
4798 }
4799 
cik_cp_fini(struct radeon_device * rdev)4800 static void cik_cp_fini(struct radeon_device *rdev)
4801 {
4802           cik_cp_gfx_fini(rdev);
4803           cik_cp_compute_fini(rdev);
4804 }
4805 
cik_cp_resume(struct radeon_device * rdev)4806 static int cik_cp_resume(struct radeon_device *rdev)
4807 {
4808           int r;
4809 
4810           cik_enable_gui_idle_interrupt(rdev, false);
4811 
4812           r = cik_cp_load_microcode(rdev);
4813           if (r)
4814                     return r;
4815 
4816           r = cik_cp_gfx_resume(rdev);
4817           if (r)
4818                     return r;
4819           r = cik_cp_compute_resume(rdev);
4820           if (r)
4821                     return r;
4822 
4823           cik_enable_gui_idle_interrupt(rdev, true);
4824 
4825           return 0;
4826 }
4827 
cik_print_gpu_status_regs(struct radeon_device * rdev)4828 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4829 {
4830           dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4831                     RREG32(GRBM_STATUS));
4832           dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4833                     RREG32(GRBM_STATUS2));
4834           dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4835                     RREG32(GRBM_STATUS_SE0));
4836           dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4837                     RREG32(GRBM_STATUS_SE1));
4838           dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4839                     RREG32(GRBM_STATUS_SE2));
4840           dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4841                     RREG32(GRBM_STATUS_SE3));
4842           dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4843                     RREG32(SRBM_STATUS));
4844           dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4845                     RREG32(SRBM_STATUS2));
4846           dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4847                     RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4848           dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4849                      RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4850           dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4851           dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4852                      RREG32(CP_STALLED_STAT1));
4853           dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4854                      RREG32(CP_STALLED_STAT2));
4855           dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4856                      RREG32(CP_STALLED_STAT3));
4857           dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4858                      RREG32(CP_CPF_BUSY_STAT));
4859           dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4860                      RREG32(CP_CPF_STALLED_STAT1));
4861           dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4862           dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4863           dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4864                      RREG32(CP_CPC_STALLED_STAT1));
4865           dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4866 }
4867 
4868 /**
4869  * cik_gpu_check_soft_reset - check which blocks are busy
4870  *
4871  * @rdev: radeon_device pointer
4872  *
4873  * Check which blocks are busy and return the relevant reset
4874  * mask to be used by cik_gpu_soft_reset().
4875  * Returns a mask of the blocks to be reset.
4876  */
cik_gpu_check_soft_reset(struct radeon_device * rdev)4877 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4878 {
4879           u32 reset_mask = 0;
4880           u32 tmp;
4881 
4882           /* GRBM_STATUS */
4883           tmp = RREG32(GRBM_STATUS);
4884           if (tmp & (PA_BUSY | SC_BUSY |
4885                        BCI_BUSY | SX_BUSY |
4886                        TA_BUSY | VGT_BUSY |
4887                        DB_BUSY | CB_BUSY |
4888                        GDS_BUSY | SPI_BUSY |
4889                        IA_BUSY | IA_BUSY_NO_DMA))
4890                     reset_mask |= RADEON_RESET_GFX;
4891 
4892           if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4893                     reset_mask |= RADEON_RESET_CP;
4894 
4895           /* GRBM_STATUS2 */
4896           tmp = RREG32(GRBM_STATUS2);
4897           if (tmp & RLC_BUSY)
4898                     reset_mask |= RADEON_RESET_RLC;
4899 
4900           /* SDMA0_STATUS_REG */
4901           tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4902           if (!(tmp & SDMA_IDLE))
4903                     reset_mask |= RADEON_RESET_DMA;
4904 
4905           /* SDMA1_STATUS_REG */
4906           tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4907           if (!(tmp & SDMA_IDLE))
4908                     reset_mask |= RADEON_RESET_DMA1;
4909 
4910           /* SRBM_STATUS2 */
4911           tmp = RREG32(SRBM_STATUS2);
4912           if (tmp & SDMA_BUSY)
4913                     reset_mask |= RADEON_RESET_DMA;
4914 
4915           if (tmp & SDMA1_BUSY)
4916                     reset_mask |= RADEON_RESET_DMA1;
4917 
4918           /* SRBM_STATUS */
4919           tmp = RREG32(SRBM_STATUS);
4920 
4921           if (tmp & IH_BUSY)
4922                     reset_mask |= RADEON_RESET_IH;
4923 
4924           if (tmp & SEM_BUSY)
4925                     reset_mask |= RADEON_RESET_SEM;
4926 
4927           if (tmp & GRBM_RQ_PENDING)
4928                     reset_mask |= RADEON_RESET_GRBM;
4929 
4930           if (tmp & VMC_BUSY)
4931                     reset_mask |= RADEON_RESET_VMC;
4932 
4933           if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4934                        MCC_BUSY | MCD_BUSY))
4935                     reset_mask |= RADEON_RESET_MC;
4936 
4937           if (evergreen_is_display_hung(rdev))
4938                     reset_mask |= RADEON_RESET_DISPLAY;
4939 
4940           /* Skip MC reset as it's mostly likely not hung, just busy */
4941           if (reset_mask & RADEON_RESET_MC) {
4942                     DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4943                     reset_mask &= ~RADEON_RESET_MC;
4944           }
4945 
4946           return reset_mask;
4947 }
4948 
4949 /**
4950  * cik_gpu_soft_reset - soft reset GPU
4951  *
4952  * @rdev: radeon_device pointer
4953  * @reset_mask: mask of which blocks to reset
4954  *
4955  * Soft reset the blocks specified in @reset_mask.
4956  */
cik_gpu_soft_reset(struct radeon_device * rdev,u32 reset_mask)4957 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4958 {
4959           struct evergreen_mc_save save;
4960           u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4961           u32 tmp;
4962 
4963           if (reset_mask == 0)
4964                     return;
4965 
4966           dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4967 
4968           cik_print_gpu_status_regs(rdev);
4969           dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4970                      RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4971           dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4972                      RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4973 
4974           /* disable CG/PG */
4975           cik_fini_pg(rdev);
4976           cik_fini_cg(rdev);
4977 
4978           /* stop the rlc */
4979           cik_rlc_stop(rdev);
4980 
4981           /* Disable GFX parsing/prefetching */
4982           WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4983 
4984           /* Disable MEC parsing/prefetching */
4985           WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4986 
4987           if (reset_mask & RADEON_RESET_DMA) {
4988                     /* sdma0 */
4989                     tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4990                     tmp |= SDMA_HALT;
4991                     WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4992           }
4993           if (reset_mask & RADEON_RESET_DMA1) {
4994                     /* sdma1 */
4995                     tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4996                     tmp |= SDMA_HALT;
4997                     WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4998           }
4999 
5000           evergreen_mc_stop(rdev, &save);
5001           if (evergreen_mc_wait_for_idle(rdev)) {
5002                     dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5003           }
5004 
5005           if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5006                     grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5007 
5008           if (reset_mask & RADEON_RESET_CP) {
5009                     grbm_soft_reset |= SOFT_RESET_CP;
5010 
5011                     srbm_soft_reset |= SOFT_RESET_GRBM;
5012           }
5013 
5014           if (reset_mask & RADEON_RESET_DMA)
5015                     srbm_soft_reset |= SOFT_RESET_SDMA;
5016 
5017           if (reset_mask & RADEON_RESET_DMA1)
5018                     srbm_soft_reset |= SOFT_RESET_SDMA1;
5019 
5020           if (reset_mask & RADEON_RESET_DISPLAY)
5021                     srbm_soft_reset |= SOFT_RESET_DC;
5022 
5023           if (reset_mask & RADEON_RESET_RLC)
5024                     grbm_soft_reset |= SOFT_RESET_RLC;
5025 
5026           if (reset_mask & RADEON_RESET_SEM)
5027                     srbm_soft_reset |= SOFT_RESET_SEM;
5028 
5029           if (reset_mask & RADEON_RESET_IH)
5030                     srbm_soft_reset |= SOFT_RESET_IH;
5031 
5032           if (reset_mask & RADEON_RESET_GRBM)
5033                     srbm_soft_reset |= SOFT_RESET_GRBM;
5034 
5035           if (reset_mask & RADEON_RESET_VMC)
5036                     srbm_soft_reset |= SOFT_RESET_VMC;
5037 
5038           if (!(rdev->flags & RADEON_IS_IGP)) {
5039                     if (reset_mask & RADEON_RESET_MC)
5040                               srbm_soft_reset |= SOFT_RESET_MC;
5041           }
5042 
5043           if (grbm_soft_reset) {
5044                     tmp = RREG32(GRBM_SOFT_RESET);
5045                     tmp |= grbm_soft_reset;
5046                     dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5047                     WREG32(GRBM_SOFT_RESET, tmp);
5048                     tmp = RREG32(GRBM_SOFT_RESET);
5049 
5050                     udelay(50);
5051 
5052                     tmp &= ~grbm_soft_reset;
5053                     WREG32(GRBM_SOFT_RESET, tmp);
5054                     tmp = RREG32(GRBM_SOFT_RESET);
5055           }
5056 
5057           if (srbm_soft_reset) {
5058                     tmp = RREG32(SRBM_SOFT_RESET);
5059                     tmp |= srbm_soft_reset;
5060                     dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5061                     WREG32(SRBM_SOFT_RESET, tmp);
5062                     tmp = RREG32(SRBM_SOFT_RESET);
5063 
5064                     udelay(50);
5065 
5066                     tmp &= ~srbm_soft_reset;
5067                     WREG32(SRBM_SOFT_RESET, tmp);
5068                     tmp = RREG32(SRBM_SOFT_RESET);
5069           }
5070 
5071           /* Wait a little for things to settle down */
5072           udelay(50);
5073 
5074           evergreen_mc_resume(rdev, &save);
5075           udelay(50);
5076 
5077           cik_print_gpu_status_regs(rdev);
5078 }
5079 
5080 struct kv_reset_save_regs {
5081           u32 gmcon_reng_execute;
5082           u32 gmcon_misc;
5083           u32 gmcon_misc3;
5084 };
5085 
kv_save_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5086 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5087                                            struct kv_reset_save_regs *save)
5088 {
5089           save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5090           save->gmcon_misc = RREG32(GMCON_MISC);
5091           save->gmcon_misc3 = RREG32(GMCON_MISC3);
5092 
5093           WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5094           WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5095                                                             STCTRL_STUTTER_EN));
5096 }
5097 
kv_restore_regs_for_reset(struct radeon_device * rdev,struct kv_reset_save_regs * save)5098 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5099                                               struct kv_reset_save_regs *save)
5100 {
5101           int i;
5102 
5103           WREG32(GMCON_PGFSM_WRITE, 0);
5104           WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5105 
5106           for (i = 0; i < 5; i++)
5107                     WREG32(GMCON_PGFSM_WRITE, 0);
5108 
5109           WREG32(GMCON_PGFSM_WRITE, 0);
5110           WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5111 
5112           for (i = 0; i < 5; i++)
5113                     WREG32(GMCON_PGFSM_WRITE, 0);
5114 
5115           WREG32(GMCON_PGFSM_WRITE, 0x210000);
5116           WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5117 
5118           for (i = 0; i < 5; i++)
5119                     WREG32(GMCON_PGFSM_WRITE, 0);
5120 
5121           WREG32(GMCON_PGFSM_WRITE, 0x21003);
5122           WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5123 
5124           for (i = 0; i < 5; i++)
5125                     WREG32(GMCON_PGFSM_WRITE, 0);
5126 
5127           WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5128           WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5129 
5130           for (i = 0; i < 5; i++)
5131                     WREG32(GMCON_PGFSM_WRITE, 0);
5132 
5133           WREG32(GMCON_PGFSM_WRITE, 0);
5134           WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5135 
5136           for (i = 0; i < 5; i++)
5137                     WREG32(GMCON_PGFSM_WRITE, 0);
5138 
5139           WREG32(GMCON_PGFSM_WRITE, 0x420000);
5140           WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5141 
5142           for (i = 0; i < 5; i++)
5143                     WREG32(GMCON_PGFSM_WRITE, 0);
5144 
5145           WREG32(GMCON_PGFSM_WRITE, 0x120202);
5146           WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5147 
5148           for (i = 0; i < 5; i++)
5149                     WREG32(GMCON_PGFSM_WRITE, 0);
5150 
5151           WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5152           WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5153 
5154           for (i = 0; i < 5; i++)
5155                     WREG32(GMCON_PGFSM_WRITE, 0);
5156 
5157           WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5158           WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5159 
5160           for (i = 0; i < 5; i++)
5161                     WREG32(GMCON_PGFSM_WRITE, 0);
5162 
5163           WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5164           WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5165 
5166           WREG32(GMCON_MISC3, save->gmcon_misc3);
5167           WREG32(GMCON_MISC, save->gmcon_misc);
5168           WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5169 }
5170 
cik_gpu_pci_config_reset(struct radeon_device * rdev)5171 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5172 {
5173           struct evergreen_mc_save save;
5174           struct kv_reset_save_regs kv_save = { 0 };
5175           u32 tmp, i;
5176 
5177           dev_info(rdev->dev, "GPU pci config reset\n");
5178 
5179           /* disable dpm? */
5180 
5181           /* disable cg/pg */
5182           cik_fini_pg(rdev);
5183           cik_fini_cg(rdev);
5184 
5185           /* Disable GFX parsing/prefetching */
5186           WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5187 
5188           /* Disable MEC parsing/prefetching */
5189           WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5190 
5191           /* sdma0 */
5192           tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5193           tmp |= SDMA_HALT;
5194           WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5195           /* sdma1 */
5196           tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5197           tmp |= SDMA_HALT;
5198           WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5199           /* XXX other engines? */
5200 
5201           /* halt the rlc, disable cp internal ints */
5202           cik_rlc_stop(rdev);
5203 
5204           udelay(50);
5205 
5206           /* disable mem access */
5207           evergreen_mc_stop(rdev, &save);
5208           if (evergreen_mc_wait_for_idle(rdev)) {
5209                     dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5210           }
5211 
5212           if (rdev->flags & RADEON_IS_IGP)
5213                     kv_save_regs_for_reset(rdev, &kv_save);
5214 
5215           /* disable BM */
5216           pci_clear_master(rdev->pdev);
5217           /* reset */
5218           radeon_pci_config_reset(rdev);
5219 
5220           udelay(100);
5221 
5222           /* wait for asic to come out of reset */
5223           for (i = 0; i < rdev->usec_timeout; i++) {
5224                     if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5225                               break;
5226                     udelay(1);
5227           }
5228 
5229           /* does asic init need to be run first??? */
5230           if (rdev->flags & RADEON_IS_IGP)
5231                     kv_restore_regs_for_reset(rdev, &kv_save);
5232 }
5233 
5234 /**
5235  * cik_asic_reset - soft reset GPU
5236  *
5237  * @rdev: radeon_device pointer
5238  * @hard: force hard reset
5239  *
5240  * Look up which blocks are hung and attempt
5241  * to reset them.
5242  * Returns 0 for success.
5243  */
cik_asic_reset(struct radeon_device * rdev,bool hard)5244 int cik_asic_reset(struct radeon_device *rdev, bool hard)
5245 {
5246           u32 reset_mask;
5247 
5248           if (hard) {
5249                     cik_gpu_pci_config_reset(rdev);
5250                     return 0;
5251           }
5252 
5253           reset_mask = cik_gpu_check_soft_reset(rdev);
5254 
5255           if (reset_mask)
5256                     r600_set_bios_scratch_engine_hung(rdev, true);
5257 
5258           /* try soft reset */
5259           cik_gpu_soft_reset(rdev, reset_mask);
5260 
5261           reset_mask = cik_gpu_check_soft_reset(rdev);
5262 
5263           /* try pci config reset */
5264           if (reset_mask && radeon_hard_reset)
5265                     cik_gpu_pci_config_reset(rdev);
5266 
5267           reset_mask = cik_gpu_check_soft_reset(rdev);
5268 
5269           if (!reset_mask)
5270                     r600_set_bios_scratch_engine_hung(rdev, false);
5271 
5272           return 0;
5273 }
5274 
5275 /**
5276  * cik_gfx_is_lockup - check if the 3D engine is locked up
5277  *
5278  * @rdev: radeon_device pointer
5279  * @ring: radeon_ring structure holding ring information
5280  *
5281  * Check if the 3D engine is locked up (CIK).
5282  * Returns true if the engine is locked, false if not.
5283  */
cik_gfx_is_lockup(struct radeon_device * rdev,struct radeon_ring * ring)5284 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5285 {
5286           u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5287 
5288           if (!(reset_mask & (RADEON_RESET_GFX |
5289                                   RADEON_RESET_COMPUTE |
5290                                   RADEON_RESET_CP))) {
5291                     radeon_ring_lockup_update(rdev, ring);
5292                     return false;
5293           }
5294           return radeon_ring_test_lockup(rdev, ring);
5295 }
5296 
5297 /* MC */
5298 /**
5299  * cik_mc_program - program the GPU memory controller
5300  *
5301  * @rdev: radeon_device pointer
5302  *
5303  * Set the location of vram, gart, and AGP in the GPU's
5304  * physical address space (CIK).
5305  */
cik_mc_program(struct radeon_device * rdev)5306 static void cik_mc_program(struct radeon_device *rdev)
5307 {
5308           struct evergreen_mc_save save;
5309           u32 tmp;
5310           int i, j;
5311 
5312           /* Initialize HDP */
5313           for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5314                     WREG32((0x2c14 + j), 0x00000000);
5315                     WREG32((0x2c18 + j), 0x00000000);
5316                     WREG32((0x2c1c + j), 0x00000000);
5317                     WREG32((0x2c20 + j), 0x00000000);
5318                     WREG32((0x2c24 + j), 0x00000000);
5319           }
5320           WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5321 
5322           evergreen_mc_stop(rdev, &save);
5323           if (radeon_mc_wait_for_idle(rdev)) {
5324                     dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5325           }
5326           /* Lockout access through VGA aperture*/
5327           WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5328           /* Update configuration */
5329           WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5330                  rdev->mc.vram_start >> 12);
5331           WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5332                  rdev->mc.vram_end >> 12);
5333           WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5334                  rdev->vram_scratch.gpu_addr >> 12);
5335           tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5336           tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5337           WREG32(MC_VM_FB_LOCATION, tmp);
5338           /* XXX double check these! */
5339           WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5340           WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5341           WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5342           WREG32(MC_VM_AGP_BASE, 0);
5343           WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5344           WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5345           if (radeon_mc_wait_for_idle(rdev)) {
5346                     dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5347           }
5348           evergreen_mc_resume(rdev, &save);
5349           /* we need to own VRAM, so turn off the VGA renderer here
5350            * to stop it overwriting our objects */
5351           rv515_vga_render_disable(rdev);
5352 }
5353 
5354 /**
5355  * cik_mc_init - initialize the memory controller driver params
5356  *
5357  * @rdev: radeon_device pointer
5358  *
5359  * Look up the amount of vram, vram width, and decide how to place
5360  * vram and gart within the GPU's physical address space (CIK).
5361  * Returns 0 for success.
5362  */
cik_mc_init(struct radeon_device * rdev)5363 static int cik_mc_init(struct radeon_device *rdev)
5364 {
5365           u32 tmp;
5366           int chansize, numchan;
5367 
5368           /* Get VRAM informations */
5369           rdev->mc.vram_is_ddr = true;
5370           tmp = RREG32(MC_ARB_RAMCFG);
5371           if (tmp & CHANSIZE_MASK) {
5372                     chansize = 64;
5373           } else {
5374                     chansize = 32;
5375           }
5376           tmp = RREG32(MC_SHARED_CHMAP);
5377           switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5378           case 0:
5379           default:
5380                     numchan = 1;
5381                     break;
5382           case 1:
5383                     numchan = 2;
5384                     break;
5385           case 2:
5386                     numchan = 4;
5387                     break;
5388           case 3:
5389                     numchan = 8;
5390                     break;
5391           case 4:
5392                     numchan = 3;
5393                     break;
5394           case 5:
5395                     numchan = 6;
5396                     break;
5397           case 6:
5398                     numchan = 10;
5399                     break;
5400           case 7:
5401                     numchan = 12;
5402                     break;
5403           case 8:
5404                     numchan = 16;
5405                     break;
5406           }
5407           rdev->mc.vram_width = numchan * chansize;
5408           /* Could aper size report 0 ? */
5409           rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5410           rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5411           /* size in MB on si */
5412           rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5413           rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5414           rdev->mc.visible_vram_size = rdev->mc.aper_size;
5415           si_vram_gtt_location(rdev, &rdev->mc);
5416           radeon_update_bandwidth_info(rdev);
5417 
5418           return 0;
5419 }
5420 
5421 /*
5422  * GART
5423  * VMID 0 is the physical GPU addresses as used by the kernel.
5424  * VMIDs 1-15 are used for userspace clients and are handled
5425  * by the radeon vm/hsa code.
5426  */
5427 /**
5428  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5429  *
5430  * @rdev: radeon_device pointer
5431  *
5432  * Flush the TLB for the VMID 0 page table (CIK).
5433  */
cik_pcie_gart_tlb_flush(struct radeon_device * rdev)5434 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5435 {
5436           /* flush hdp cache */
5437           WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5438 
5439           /* bits 0-15 are the VM contexts0-15 */
5440           WREG32(VM_INVALIDATE_REQUEST, 0x1);
5441 }
5442 
5443 /**
5444  * cik_pcie_gart_enable - gart enable
5445  *
5446  * @rdev: radeon_device pointer
5447  *
5448  * This sets up the TLBs, programs the page tables for VMID0,
5449  * sets up the hw for VMIDs 1-15 which are allocated on
5450  * demand, and sets up the global locations for the LDS, GDS,
5451  * and GPUVM for FSA64 clients (CIK).
5452  * Returns 0 for success, errors for failure.
5453  */
cik_pcie_gart_enable(struct radeon_device * rdev)5454 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5455 {
5456           int r, i;
5457 
5458           if (rdev->gart.robj == NULL) {
5459                     dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5460                     return -EINVAL;
5461           }
5462           r = radeon_gart_table_vram_pin(rdev);
5463           if (r)
5464                     return r;
5465           /* Setup TLB control */
5466           WREG32(MC_VM_MX_L1_TLB_CNTL,
5467                  (0xA << 7) |
5468                  ENABLE_L1_TLB |
5469                  ENABLE_L1_FRAGMENT_PROCESSING |
5470                  SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5471                  ENABLE_ADVANCED_DRIVER_MODEL |
5472                  SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5473           /* Setup L2 cache */
5474           WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5475                  ENABLE_L2_FRAGMENT_PROCESSING |
5476                  ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5477                  ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5478                  EFFECTIVE_L2_QUEUE_SIZE(7) |
5479                  CONTEXT1_IDENTITY_ACCESS_MODE(1));
5480           WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5481           WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5482                  BANK_SELECT(4) |
5483                  L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5484           /* setup context0 */
5485           WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5486           WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5487           WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5488           WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5489                               (u32)(rdev->dummy_page.addr >> 12));
5490           WREG32(VM_CONTEXT0_CNTL2, 0);
5491           WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5492                                           RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5493 
5494           WREG32(0x15D4, 0);
5495           WREG32(0x15D8, 0);
5496           WREG32(0x15DC, 0);
5497 
5498           /* restore context1-15 */
5499           /* set vm size, must be a multiple of 4 */
5500           WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5501           WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5502           for (i = 1; i < 16; i++) {
5503                     if (i < 8)
5504                               WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5505                                      rdev->vm_manager.saved_table_addr[i]);
5506                     else
5507                               WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5508                                      rdev->vm_manager.saved_table_addr[i]);
5509           }
5510 
5511           /* enable context1-15 */
5512           WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5513                  (u32)(rdev->dummy_page.addr >> 12));
5514           WREG32(VM_CONTEXT1_CNTL2, 4);
5515           WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5516                                         PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5517                                         RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5518                                         RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5519                                         DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5520                                         DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5521                                         PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5522                                         PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5523                                         VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5524                                         VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5525                                         READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5526                                         READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5527                                         WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5528                                         WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5529 
5530           if (rdev->family == CHIP_KAVERI) {
5531                     u32 tmp = RREG32(CHUB_CONTROL);
5532                     tmp &= ~BYPASS_VM;
5533                     WREG32(CHUB_CONTROL, tmp);
5534           }
5535 
5536           /* XXX SH_MEM regs */
5537           /* where to put LDS, scratch, GPUVM in FSA64 space */
5538           mutex_lock(&rdev->srbm_mutex);
5539           for (i = 0; i < 16; i++) {
5540                     cik_srbm_select(rdev, 0, 0, 0, i);
5541                     /* CP and shaders */
5542                     WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5543                     WREG32(SH_MEM_APE1_BASE, 1);
5544                     WREG32(SH_MEM_APE1_LIMIT, 0);
5545                     WREG32(SH_MEM_BASES, 0);
5546                     /* SDMA GFX */
5547                     WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5548                     WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5549                     WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5550                     WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5551                     /* XXX SDMA RLC - todo */
5552           }
5553           cik_srbm_select(rdev, 0, 0, 0, 0);
5554           mutex_unlock(&rdev->srbm_mutex);
5555 
5556           cik_pcie_gart_tlb_flush(rdev);
5557           DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5558                      (unsigned)(rdev->mc.gtt_size >> 20),
5559                      (unsigned long long)rdev->gart.table_addr);
5560           rdev->gart.ready = true;
5561           return 0;
5562 }
5563 
5564 /**
5565  * cik_pcie_gart_disable - gart disable
5566  *
5567  * @rdev: radeon_device pointer
5568  *
5569  * This disables all VM page table (CIK).
5570  */
cik_pcie_gart_disable(struct radeon_device * rdev)5571 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5572 {
5573           unsigned i;
5574 
5575           for (i = 1; i < 16; ++i) {
5576                     uint32_t reg;
5577                     if (i < 8)
5578                               reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5579                     else
5580                               reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5581                     rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5582           }
5583 
5584           /* Disable all tables */
5585           WREG32(VM_CONTEXT0_CNTL, 0);
5586           WREG32(VM_CONTEXT1_CNTL, 0);
5587           /* Setup TLB control */
5588           WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5589                  SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5590           /* Setup L2 cache */
5591           WREG32(VM_L2_CNTL,
5592                  ENABLE_L2_FRAGMENT_PROCESSING |
5593                  ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5594                  ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5595                  EFFECTIVE_L2_QUEUE_SIZE(7) |
5596                  CONTEXT1_IDENTITY_ACCESS_MODE(1));
5597           WREG32(VM_L2_CNTL2, 0);
5598           WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5599                  L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5600           radeon_gart_table_vram_unpin(rdev);
5601 }
5602 
5603 /**
5604  * cik_pcie_gart_fini - vm fini callback
5605  *
5606  * @rdev: radeon_device pointer
5607  *
5608  * Tears down the driver GART/VM setup (CIK).
5609  */
cik_pcie_gart_fini(struct radeon_device * rdev)5610 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5611 {
5612           cik_pcie_gart_disable(rdev);
5613           radeon_gart_table_vram_free(rdev);
5614           radeon_gart_fini(rdev);
5615 }
5616 
5617 /* vm parser */
5618 /**
5619  * cik_ib_parse - vm ib_parse callback
5620  *
5621  * @rdev: radeon_device pointer
5622  * @ib: indirect buffer pointer
5623  *
5624  * CIK uses hw IB checking so this is a nop (CIK).
5625  */
cik_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)5626 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5627 {
5628           return 0;
5629 }
5630 
5631 /*
5632  * vm
5633  * VMID 0 is the physical GPU addresses as used by the kernel.
5634  * VMIDs 1-15 are used for userspace clients and are handled
5635  * by the radeon vm/hsa code.
5636  */
5637 /**
5638  * cik_vm_init - cik vm init callback
5639  *
5640  * @rdev: radeon_device pointer
5641  *
5642  * Inits cik specific vm parameters (number of VMs, base of vram for
5643  * VMIDs 1-15) (CIK).
5644  * Returns 0 for success.
5645  */
cik_vm_init(struct radeon_device * rdev)5646 int cik_vm_init(struct radeon_device *rdev)
5647 {
5648           /*
5649            * number of VMs
5650            * VMID 0 is reserved for System
5651            * radeon graphics/compute will use VMIDs 1-15
5652            */
5653           rdev->vm_manager.nvm = 16;
5654           /* base offset of vram pages */
5655           if (rdev->flags & RADEON_IS_IGP) {
5656                     u64 tmp = RREG32(MC_VM_FB_OFFSET);
5657                     tmp <<= 22;
5658                     rdev->vm_manager.vram_base_offset = tmp;
5659           } else
5660                     rdev->vm_manager.vram_base_offset = 0;
5661 
5662           return 0;
5663 }
5664 
5665 /**
5666  * cik_vm_fini - cik vm fini callback
5667  *
5668  * @rdev: radeon_device pointer
5669  *
5670  * Tear down any asic specific VM setup (CIK).
5671  */
cik_vm_fini(struct radeon_device * rdev)5672 void cik_vm_fini(struct radeon_device *rdev)
5673 {
5674 }
5675 
5676 /**
5677  * cik_vm_decode_fault - print human readable fault info
5678  *
5679  * @rdev: radeon_device pointer
5680  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5681  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5682  *
5683  * Print human readable fault information (CIK).
5684  */
cik_vm_decode_fault(struct radeon_device * rdev,u32 status,u32 addr,u32 mc_client)5685 static void cik_vm_decode_fault(struct radeon_device *rdev,
5686                                         u32 status, u32 addr, u32 mc_client)
5687 {
5688           u32 mc_id;
5689           u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5690           u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5691           char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5692                     (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5693 
5694           if (rdev->family == CHIP_HAWAII)
5695                     mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5696           else
5697                     mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5698 
5699           printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5700                  protections, vmid, addr,
5701                  (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5702                  block, mc_client, mc_id);
5703 }
5704 
5705 /**
5706  * cik_vm_flush - cik vm flush using the CP
5707  *
5708  * @rdev: radeon_device pointer
5709  *
5710  * Update the page table base and flush the VM TLB
5711  * using the CP (CIK).
5712  */
cik_vm_flush(struct radeon_device * rdev,struct radeon_ring * ring,unsigned vm_id,uint64_t pd_addr)5713 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5714                       unsigned vm_id, uint64_t pd_addr)
5715 {
5716           int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5717 
5718           radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5719           radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720                                          WRITE_DATA_DST_SEL(0)));
5721           if (vm_id < 8) {
5722                     radeon_ring_write(ring,
5723                                           (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5724           } else {
5725                     radeon_ring_write(ring,
5726                                           (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5727           }
5728           radeon_ring_write(ring, 0);
5729           radeon_ring_write(ring, pd_addr >> 12);
5730 
5731           /* update SH_MEM_* regs */
5732           radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5733           radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5734                                          WRITE_DATA_DST_SEL(0)));
5735           radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5736           radeon_ring_write(ring, 0);
5737           radeon_ring_write(ring, VMID(vm_id));
5738 
5739           radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5740           radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5741                                          WRITE_DATA_DST_SEL(0)));
5742           radeon_ring_write(ring, SH_MEM_BASES >> 2);
5743           radeon_ring_write(ring, 0);
5744 
5745           radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5746           radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5747           radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5748           radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5749 
5750           radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5751           radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5752                                          WRITE_DATA_DST_SEL(0)));
5753           radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5754           radeon_ring_write(ring, 0);
5755           radeon_ring_write(ring, VMID(0));
5756 
5757           /* HDP flush */
5758           cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5759 
5760           /* bits 0-15 are the VM contexts0-15 */
5761           radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5762           radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5763                                          WRITE_DATA_DST_SEL(0)));
5764           radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5765           radeon_ring_write(ring, 0);
5766           radeon_ring_write(ring, 1 << vm_id);
5767 
5768           /* wait for the invalidate to complete */
5769           radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5770           radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5771                                          WAIT_REG_MEM_FUNCTION(0) |  /* always */
5772                                          WAIT_REG_MEM_ENGINE(0))); /* me */
5773           radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5774           radeon_ring_write(ring, 0);
5775           radeon_ring_write(ring, 0); /* ref */
5776           radeon_ring_write(ring, 0); /* mask */
5777           radeon_ring_write(ring, 0x20); /* poll interval */
5778 
5779           /* compute doesn't have PFP */
5780           if (usepfp) {
5781                     /* sync PFP to ME, otherwise we might get invalid PFP reads */
5782                     radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5783                     radeon_ring_write(ring, 0x0);
5784           }
5785 }
5786 
5787 /*
5788  * RLC
5789  * The RLC is a multi-purpose microengine that handles a
5790  * variety of functions, the most important of which is
5791  * the interrupt controller.
5792  */
cik_enable_gui_idle_interrupt(struct radeon_device * rdev,bool enable)5793 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5794                                                     bool enable)
5795 {
5796           u32 tmp = RREG32(CP_INT_CNTL_RING0);
5797 
5798           if (enable)
5799                     tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5800           else
5801                     tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5802           WREG32(CP_INT_CNTL_RING0, tmp);
5803 }
5804 
cik_enable_lbpw(struct radeon_device * rdev,bool enable)5805 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5806 {
5807           u32 tmp;
5808 
5809           tmp = RREG32(RLC_LB_CNTL);
5810           if (enable)
5811                     tmp |= LOAD_BALANCE_ENABLE;
5812           else
5813                     tmp &= ~LOAD_BALANCE_ENABLE;
5814           WREG32(RLC_LB_CNTL, tmp);
5815 }
5816 
cik_wait_for_rlc_serdes(struct radeon_device * rdev)5817 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5818 {
5819           u32 i, j, k;
5820           u32 mask;
5821 
5822           mutex_lock(&rdev->grbm_idx_mutex);
5823           for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5824                     for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5825                               cik_select_se_sh(rdev, i, j);
5826                               for (k = 0; k < rdev->usec_timeout; k++) {
5827                                         if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5828                                                   break;
5829                                         udelay(1);
5830                               }
5831                     }
5832           }
5833           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5834           mutex_unlock(&rdev->grbm_idx_mutex);
5835 
5836           mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5837           for (k = 0; k < rdev->usec_timeout; k++) {
5838                     if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5839                               break;
5840                     udelay(1);
5841           }
5842 }
5843 
cik_update_rlc(struct radeon_device * rdev,u32 rlc)5844 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5845 {
5846           u32 tmp;
5847 
5848           tmp = RREG32(RLC_CNTL);
5849           if (tmp != rlc)
5850                     WREG32(RLC_CNTL, rlc);
5851 }
5852 
cik_halt_rlc(struct radeon_device * rdev)5853 static u32 cik_halt_rlc(struct radeon_device *rdev)
5854 {
5855           u32 data, orig;
5856 
5857           orig = data = RREG32(RLC_CNTL);
5858 
5859           if (data & RLC_ENABLE) {
5860                     u32 i;
5861 
5862                     data &= ~RLC_ENABLE;
5863                     WREG32(RLC_CNTL, data);
5864 
5865                     for (i = 0; i < rdev->usec_timeout; i++) {
5866                               if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5867                                         break;
5868                               udelay(1);
5869                     }
5870 
5871                     cik_wait_for_rlc_serdes(rdev);
5872           }
5873 
5874           return orig;
5875 }
5876 
cik_enter_rlc_safe_mode(struct radeon_device * rdev)5877 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5878 {
5879           u32 tmp, i, mask;
5880 
5881           tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5882           WREG32(RLC_GPR_REG2, tmp);
5883 
5884           mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5885           for (i = 0; i < rdev->usec_timeout; i++) {
5886                     if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5887                               break;
5888                     udelay(1);
5889           }
5890 
5891           for (i = 0; i < rdev->usec_timeout; i++) {
5892                     if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5893                               break;
5894                     udelay(1);
5895           }
5896 }
5897 
cik_exit_rlc_safe_mode(struct radeon_device * rdev)5898 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5899 {
5900           u32 tmp;
5901 
5902           tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5903           WREG32(RLC_GPR_REG2, tmp);
5904 }
5905 
5906 /**
5907  * cik_rlc_stop - stop the RLC ME
5908  *
5909  * @rdev: radeon_device pointer
5910  *
5911  * Halt the RLC ME (MicroEngine) (CIK).
5912  */
cik_rlc_stop(struct radeon_device * rdev)5913 static void cik_rlc_stop(struct radeon_device *rdev)
5914 {
5915           WREG32(RLC_CNTL, 0);
5916 
5917           cik_enable_gui_idle_interrupt(rdev, false);
5918 
5919           cik_wait_for_rlc_serdes(rdev);
5920 }
5921 
5922 /**
5923  * cik_rlc_start - start the RLC ME
5924  *
5925  * @rdev: radeon_device pointer
5926  *
5927  * Unhalt the RLC ME (MicroEngine) (CIK).
5928  */
cik_rlc_start(struct radeon_device * rdev)5929 static void cik_rlc_start(struct radeon_device *rdev)
5930 {
5931           WREG32(RLC_CNTL, RLC_ENABLE);
5932 
5933           cik_enable_gui_idle_interrupt(rdev, true);
5934 
5935           udelay(50);
5936 }
5937 
5938 /**
5939  * cik_rlc_resume - setup the RLC hw
5940  *
5941  * @rdev: radeon_device pointer
5942  *
5943  * Initialize the RLC registers, load the ucode,
5944  * and start the RLC (CIK).
5945  * Returns 0 for success, -EINVAL if the ucode is not available.
5946  */
cik_rlc_resume(struct radeon_device * rdev)5947 static int cik_rlc_resume(struct radeon_device *rdev)
5948 {
5949           u32 i, size, tmp;
5950 
5951           if (!rdev->rlc_fw)
5952                     return -EINVAL;
5953 
5954           cik_rlc_stop(rdev);
5955 
5956           /* disable CG */
5957           tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5958           WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5959 
5960           si_rlc_reset(rdev);
5961 
5962           cik_init_pg(rdev);
5963 
5964           cik_init_cg(rdev);
5965 
5966           WREG32(RLC_LB_CNTR_INIT, 0);
5967           WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5968 
5969           mutex_lock(&rdev->grbm_idx_mutex);
5970           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5971           WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5972           WREG32(RLC_LB_PARAMS, 0x00600408);
5973           WREG32(RLC_LB_CNTL, 0x80000004);
5974           mutex_unlock(&rdev->grbm_idx_mutex);
5975 
5976           WREG32(RLC_MC_CNTL, 0);
5977           WREG32(RLC_UCODE_CNTL, 0);
5978 
5979           if (rdev->new_fw) {
5980                     const struct rlc_firmware_header_v1_0 *hdr =
5981                               (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5982                     const __le32 *fw_data = (const __le32 *)
5983                               (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5984 
5985                     radeon_ucode_print_rlc_hdr(&hdr->header);
5986 
5987                     size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5988                     WREG32(RLC_GPM_UCODE_ADDR, 0);
5989                     for (i = 0; i < size; i++)
5990                               WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5991                     WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5992           } else {
5993                     const __be32 *fw_data;
5994 
5995                     switch (rdev->family) {
5996                     case CHIP_BONAIRE:
5997                     case CHIP_HAWAII:
5998                     default:
5999                               size = BONAIRE_RLC_UCODE_SIZE;
6000                               break;
6001                     case CHIP_KAVERI:
6002                               size = KV_RLC_UCODE_SIZE;
6003                               break;
6004                     case CHIP_KABINI:
6005                               size = KB_RLC_UCODE_SIZE;
6006                               break;
6007                     case CHIP_MULLINS:
6008                               size = ML_RLC_UCODE_SIZE;
6009                               break;
6010                     }
6011 
6012                     fw_data = (const __be32 *)rdev->rlc_fw->data;
6013                     WREG32(RLC_GPM_UCODE_ADDR, 0);
6014                     for (i = 0; i < size; i++)
6015                               WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6016                     WREG32(RLC_GPM_UCODE_ADDR, 0);
6017           }
6018 
6019           /* XXX - find out what chips support lbpw */
6020           cik_enable_lbpw(rdev, false);
6021 
6022           if (rdev->family == CHIP_BONAIRE)
6023                     WREG32(RLC_DRIVER_DMA_STATUS, 0);
6024 
6025           cik_rlc_start(rdev);
6026 
6027           return 0;
6028 }
6029 
cik_enable_cgcg(struct radeon_device * rdev,bool enable)6030 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6031 {
6032           u32 data, orig, tmp, tmp2;
6033 
6034           orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6035 
6036           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6037                     cik_enable_gui_idle_interrupt(rdev, true);
6038 
6039                     tmp = cik_halt_rlc(rdev);
6040 
6041                     mutex_lock(&rdev->grbm_idx_mutex);
6042                     cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6043                     WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6044                     WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6045                     tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6046                     WREG32(RLC_SERDES_WR_CTRL, tmp2);
6047                     mutex_unlock(&rdev->grbm_idx_mutex);
6048 
6049                     cik_update_rlc(rdev, tmp);
6050 
6051                     data |= CGCG_EN | CGLS_EN;
6052           } else {
6053                     cik_enable_gui_idle_interrupt(rdev, false);
6054 
6055                     RREG32(CB_CGTT_SCLK_CTRL);
6056                     RREG32(CB_CGTT_SCLK_CTRL);
6057                     RREG32(CB_CGTT_SCLK_CTRL);
6058                     RREG32(CB_CGTT_SCLK_CTRL);
6059 
6060                     data &= ~(CGCG_EN | CGLS_EN);
6061           }
6062 
6063           if (orig != data)
6064                     WREG32(RLC_CGCG_CGLS_CTRL, data);
6065 
6066 }
6067 
cik_enable_mgcg(struct radeon_device * rdev,bool enable)6068 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6069 {
6070           u32 data, orig, tmp = 0;
6071 
6072           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6073                     if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6074                               if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6075                                         orig = data = RREG32(CP_MEM_SLP_CNTL);
6076                                         data |= CP_MEM_LS_EN;
6077                                         if (orig != data)
6078                                                   WREG32(CP_MEM_SLP_CNTL, data);
6079                               }
6080                     }
6081 
6082                     orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6083                     data |= 0x00000001;
6084                     data &= 0xfffffffd;
6085                     if (orig != data)
6086                               WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6087 
6088                     tmp = cik_halt_rlc(rdev);
6089 
6090                     mutex_lock(&rdev->grbm_idx_mutex);
6091                     cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6092                     WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6093                     WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6094                     data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6095                     WREG32(RLC_SERDES_WR_CTRL, data);
6096                     mutex_unlock(&rdev->grbm_idx_mutex);
6097 
6098                     cik_update_rlc(rdev, tmp);
6099 
6100                     if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6101                               orig = data = RREG32(CGTS_SM_CTRL_REG);
6102                               data &= ~SM_MODE_MASK;
6103                               data |= SM_MODE(0x2);
6104                               data |= SM_MODE_ENABLE;
6105                               data &= ~CGTS_OVERRIDE;
6106                               if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6107                                   (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6108                                         data &= ~CGTS_LS_OVERRIDE;
6109                               data &= ~ON_MONITOR_ADD_MASK;
6110                               data |= ON_MONITOR_ADD_EN;
6111                               data |= ON_MONITOR_ADD(0x96);
6112                               if (orig != data)
6113                                         WREG32(CGTS_SM_CTRL_REG, data);
6114                     }
6115           } else {
6116                     orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6117                     data |= 0x00000003;
6118                     if (orig != data)
6119                               WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6120 
6121                     data = RREG32(RLC_MEM_SLP_CNTL);
6122                     if (data & RLC_MEM_LS_EN) {
6123                               data &= ~RLC_MEM_LS_EN;
6124                               WREG32(RLC_MEM_SLP_CNTL, data);
6125                     }
6126 
6127                     data = RREG32(CP_MEM_SLP_CNTL);
6128                     if (data & CP_MEM_LS_EN) {
6129                               data &= ~CP_MEM_LS_EN;
6130                               WREG32(CP_MEM_SLP_CNTL, data);
6131                     }
6132 
6133                     orig = data = RREG32(CGTS_SM_CTRL_REG);
6134                     data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6135                     if (orig != data)
6136                               WREG32(CGTS_SM_CTRL_REG, data);
6137 
6138                     tmp = cik_halt_rlc(rdev);
6139 
6140                     mutex_lock(&rdev->grbm_idx_mutex);
6141                     cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6142                     WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6143                     WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6144                     data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6145                     WREG32(RLC_SERDES_WR_CTRL, data);
6146                     mutex_unlock(&rdev->grbm_idx_mutex);
6147 
6148                     cik_update_rlc(rdev, tmp);
6149           }
6150 }
6151 
6152 static const u32 mc_cg_registers[] =
6153 {
6154           MC_HUB_MISC_HUB_CG,
6155           MC_HUB_MISC_SIP_CG,
6156           MC_HUB_MISC_VM_CG,
6157           MC_XPB_CLK_GAT,
6158           ATC_MISC_CG,
6159           MC_CITF_MISC_WR_CG,
6160           MC_CITF_MISC_RD_CG,
6161           MC_CITF_MISC_VM_CG,
6162           VM_L2_CG,
6163 };
6164 
cik_enable_mc_ls(struct radeon_device * rdev,bool enable)6165 static void cik_enable_mc_ls(struct radeon_device *rdev,
6166                                    bool enable)
6167 {
6168           int i;
6169           u32 orig, data;
6170 
6171           for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6172                     orig = data = RREG32(mc_cg_registers[i]);
6173                     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6174                               data |= MC_LS_ENABLE;
6175                     else
6176                               data &= ~MC_LS_ENABLE;
6177                     if (data != orig)
6178                               WREG32(mc_cg_registers[i], data);
6179           }
6180 }
6181 
cik_enable_mc_mgcg(struct radeon_device * rdev,bool enable)6182 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6183                                      bool enable)
6184 {
6185           int i;
6186           u32 orig, data;
6187 
6188           for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6189                     orig = data = RREG32(mc_cg_registers[i]);
6190                     if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6191                               data |= MC_CG_ENABLE;
6192                     else
6193                               data &= ~MC_CG_ENABLE;
6194                     if (data != orig)
6195                               WREG32(mc_cg_registers[i], data);
6196           }
6197 }
6198 
cik_enable_sdma_mgcg(struct radeon_device * rdev,bool enable)6199 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6200                                          bool enable)
6201 {
6202           u32 orig, data;
6203 
6204           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6205                     WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6206                     WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6207           } else {
6208                     orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6209                     data |= 0xff000000;
6210                     if (data != orig)
6211                               WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6212 
6213                     orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6214                     data |= 0xff000000;
6215                     if (data != orig)
6216                               WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6217           }
6218 }
6219 
cik_enable_sdma_mgls(struct radeon_device * rdev,bool enable)6220 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6221                                          bool enable)
6222 {
6223           u32 orig, data;
6224 
6225           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6226                     orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6227                     data |= 0x100;
6228                     if (orig != data)
6229                               WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6230 
6231                     orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6232                     data |= 0x100;
6233                     if (orig != data)
6234                               WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6235           } else {
6236                     orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6237                     data &= ~0x100;
6238                     if (orig != data)
6239                               WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6240 
6241                     orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6242                     data &= ~0x100;
6243                     if (orig != data)
6244                               WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6245           }
6246 }
6247 
cik_enable_uvd_mgcg(struct radeon_device * rdev,bool enable)6248 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6249                                         bool enable)
6250 {
6251           u32 orig, data;
6252 
6253           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6254                     data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6255                     data = 0xfff;
6256                     WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6257 
6258                     orig = data = RREG32(UVD_CGC_CTRL);
6259                     data |= DCM;
6260                     if (orig != data)
6261                               WREG32(UVD_CGC_CTRL, data);
6262           } else {
6263                     data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6264                     data &= ~0xfff;
6265                     WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6266 
6267                     orig = data = RREG32(UVD_CGC_CTRL);
6268                     data &= ~DCM;
6269                     if (orig != data)
6270                               WREG32(UVD_CGC_CTRL, data);
6271           }
6272 }
6273 
cik_enable_bif_mgls(struct radeon_device * rdev,bool enable)6274 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6275                                      bool enable)
6276 {
6277           u32 orig, data;
6278 
6279           orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6280 
6281           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6282                     data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6283                               REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6284           else
6285                     data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6286                                 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6287 
6288           if (orig != data)
6289                     WREG32_PCIE_PORT(PCIE_CNTL2, data);
6290 }
6291 
cik_enable_hdp_mgcg(struct radeon_device * rdev,bool enable)6292 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6293                                         bool enable)
6294 {
6295           u32 orig, data;
6296 
6297           orig = data = RREG32(HDP_HOST_PATH_CNTL);
6298 
6299           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6300                     data &= ~CLOCK_GATING_DIS;
6301           else
6302                     data |= CLOCK_GATING_DIS;
6303 
6304           if (orig != data)
6305                     WREG32(HDP_HOST_PATH_CNTL, data);
6306 }
6307 
cik_enable_hdp_ls(struct radeon_device * rdev,bool enable)6308 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6309                                     bool enable)
6310 {
6311           u32 orig, data;
6312 
6313           orig = data = RREG32(HDP_MEM_POWER_LS);
6314 
6315           if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6316                     data |= HDP_LS_ENABLE;
6317           else
6318                     data &= ~HDP_LS_ENABLE;
6319 
6320           if (orig != data)
6321                     WREG32(HDP_MEM_POWER_LS, data);
6322 }
6323 
cik_update_cg(struct radeon_device * rdev,u32 block,bool enable)6324 void cik_update_cg(struct radeon_device *rdev,
6325                        u32 block, bool enable)
6326 {
6327 
6328           if (block & RADEON_CG_BLOCK_GFX) {
6329                     cik_enable_gui_idle_interrupt(rdev, false);
6330                     /* order matters! */
6331                     if (enable) {
6332                               cik_enable_mgcg(rdev, true);
6333                               cik_enable_cgcg(rdev, true);
6334                     } else {
6335                               cik_enable_cgcg(rdev, false);
6336                               cik_enable_mgcg(rdev, false);
6337                     }
6338                     cik_enable_gui_idle_interrupt(rdev, true);
6339           }
6340 
6341           if (block & RADEON_CG_BLOCK_MC) {
6342                     if (!(rdev->flags & RADEON_IS_IGP)) {
6343                               cik_enable_mc_mgcg(rdev, enable);
6344                               cik_enable_mc_ls(rdev, enable);
6345                     }
6346           }
6347 
6348           if (block & RADEON_CG_BLOCK_SDMA) {
6349                     cik_enable_sdma_mgcg(rdev, enable);
6350                     cik_enable_sdma_mgls(rdev, enable);
6351           }
6352 
6353           if (block & RADEON_CG_BLOCK_BIF) {
6354                     cik_enable_bif_mgls(rdev, enable);
6355           }
6356 
6357           if (block & RADEON_CG_BLOCK_UVD) {
6358                     if (rdev->has_uvd)
6359                               cik_enable_uvd_mgcg(rdev, enable);
6360           }
6361 
6362           if (block & RADEON_CG_BLOCK_HDP) {
6363                     cik_enable_hdp_mgcg(rdev, enable);
6364                     cik_enable_hdp_ls(rdev, enable);
6365           }
6366 
6367           if (block & RADEON_CG_BLOCK_VCE) {
6368                     vce_v2_0_enable_mgcg(rdev, enable);
6369           }
6370 }
6371 
cik_init_cg(struct radeon_device * rdev)6372 static void cik_init_cg(struct radeon_device *rdev)
6373 {
6374 
6375           cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6376 
6377           if (rdev->has_uvd)
6378                     si_init_uvd_internal_cg(rdev);
6379 
6380           cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6381                                    RADEON_CG_BLOCK_SDMA |
6382                                    RADEON_CG_BLOCK_BIF |
6383                                    RADEON_CG_BLOCK_UVD |
6384                                    RADEON_CG_BLOCK_HDP), true);
6385 }
6386 
cik_fini_cg(struct radeon_device * rdev)6387 static void cik_fini_cg(struct radeon_device *rdev)
6388 {
6389           cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6390                                    RADEON_CG_BLOCK_SDMA |
6391                                    RADEON_CG_BLOCK_BIF |
6392                                    RADEON_CG_BLOCK_UVD |
6393                                    RADEON_CG_BLOCK_HDP), false);
6394 
6395           cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6396 }
6397 
cik_enable_sck_slowdown_on_pu(struct radeon_device * rdev,bool enable)6398 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6399                                                     bool enable)
6400 {
6401           u32 data, orig;
6402 
6403           orig = data = RREG32(RLC_PG_CNTL);
6404           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6405                     data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6406           else
6407                     data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6408           if (orig != data)
6409                     WREG32(RLC_PG_CNTL, data);
6410 }
6411 
cik_enable_sck_slowdown_on_pd(struct radeon_device * rdev,bool enable)6412 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6413                                                     bool enable)
6414 {
6415           u32 data, orig;
6416 
6417           orig = data = RREG32(RLC_PG_CNTL);
6418           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6419                     data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6420           else
6421                     data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6422           if (orig != data)
6423                     WREG32(RLC_PG_CNTL, data);
6424 }
6425 
cik_enable_cp_pg(struct radeon_device * rdev,bool enable)6426 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6427 {
6428           u32 data, orig;
6429 
6430           orig = data = RREG32(RLC_PG_CNTL);
6431           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6432                     data &= ~DISABLE_CP_PG;
6433           else
6434                     data |= DISABLE_CP_PG;
6435           if (orig != data)
6436                     WREG32(RLC_PG_CNTL, data);
6437 }
6438 
cik_enable_gds_pg(struct radeon_device * rdev,bool enable)6439 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6440 {
6441           u32 data, orig;
6442 
6443           orig = data = RREG32(RLC_PG_CNTL);
6444           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6445                     data &= ~DISABLE_GDS_PG;
6446           else
6447                     data |= DISABLE_GDS_PG;
6448           if (orig != data)
6449                     WREG32(RLC_PG_CNTL, data);
6450 }
6451 
6452 #define CP_ME_TABLE_SIZE    96
6453 #define CP_ME_TABLE_OFFSET  2048
6454 #define CP_MEC_TABLE_OFFSET 4096
6455 
cik_init_cp_pg_table(struct radeon_device * rdev)6456 void cik_init_cp_pg_table(struct radeon_device *rdev)
6457 {
6458           volatile u32 *dst_ptr;
6459           int me, i, max_me = 4;
6460           u32 bo_offset = 0;
6461           u32 table_offset, table_size;
6462 
6463           if (rdev->family == CHIP_KAVERI)
6464                     max_me = 5;
6465 
6466           if (rdev->rlc.cp_table_ptr == NULL)
6467                     return;
6468 
6469           /* write the cp table buffer */
6470           dst_ptr = rdev->rlc.cp_table_ptr;
6471           for (me = 0; me < max_me; me++) {
6472                     if (rdev->new_fw) {
6473                               const __le32 *fw_data;
6474                               const struct gfx_firmware_header_v1_0 *hdr;
6475 
6476                               if (me == 0) {
6477                                         hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6478                                         fw_data = (const __le32 *)
6479                                                   (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6480                                         table_offset = le32_to_cpu(hdr->jt_offset);
6481                                         table_size = le32_to_cpu(hdr->jt_size);
6482                               } else if (me == 1) {
6483                                         hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6484                                         fw_data = (const __le32 *)
6485                                                   (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6486                                         table_offset = le32_to_cpu(hdr->jt_offset);
6487                                         table_size = le32_to_cpu(hdr->jt_size);
6488                               } else if (me == 2) {
6489                                         hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6490                                         fw_data = (const __le32 *)
6491                                                   (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6492                                         table_offset = le32_to_cpu(hdr->jt_offset);
6493                                         table_size = le32_to_cpu(hdr->jt_size);
6494                               } else if (me == 3) {
6495                                         hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6496                                         fw_data = (const __le32 *)
6497                                                   (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6498                                         table_offset = le32_to_cpu(hdr->jt_offset);
6499                                         table_size = le32_to_cpu(hdr->jt_size);
6500                               } else {
6501                                         hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6502                                         fw_data = (const __le32 *)
6503                                                   (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6504                                         table_offset = le32_to_cpu(hdr->jt_offset);
6505                                         table_size = le32_to_cpu(hdr->jt_size);
6506                               }
6507 
6508                               for (i = 0; i < table_size; i ++) {
6509                                         dst_ptr[bo_offset + i] =
6510                                                   cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6511                               }
6512                               bo_offset += table_size;
6513                     } else {
6514                               const __be32 *fw_data;
6515                               table_size = CP_ME_TABLE_SIZE;
6516 
6517                               if (me == 0) {
6518                                         fw_data = (const __be32 *)rdev->ce_fw->data;
6519                                         table_offset = CP_ME_TABLE_OFFSET;
6520                               } else if (me == 1) {
6521                                         fw_data = (const __be32 *)rdev->pfp_fw->data;
6522                                         table_offset = CP_ME_TABLE_OFFSET;
6523                               } else if (me == 2) {
6524                                         fw_data = (const __be32 *)rdev->me_fw->data;
6525                                         table_offset = CP_ME_TABLE_OFFSET;
6526                               } else {
6527                                         fw_data = (const __be32 *)rdev->mec_fw->data;
6528                                         table_offset = CP_MEC_TABLE_OFFSET;
6529                               }
6530 
6531                               for (i = 0; i < table_size; i ++) {
6532                                         dst_ptr[bo_offset + i] =
6533                                                   cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6534                               }
6535                               bo_offset += table_size;
6536                     }
6537           }
6538 }
6539 
cik_enable_gfx_cgpg(struct radeon_device * rdev,bool enable)6540 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6541                                         bool enable)
6542 {
6543           u32 data, orig;
6544 
6545           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6546                     orig = data = RREG32(RLC_PG_CNTL);
6547                     data |= GFX_PG_ENABLE;
6548                     if (orig != data)
6549                               WREG32(RLC_PG_CNTL, data);
6550 
6551                     orig = data = RREG32(RLC_AUTO_PG_CTRL);
6552                     data |= AUTO_PG_EN;
6553                     if (orig != data)
6554                               WREG32(RLC_AUTO_PG_CTRL, data);
6555           } else {
6556                     orig = data = RREG32(RLC_PG_CNTL);
6557                     data &= ~GFX_PG_ENABLE;
6558                     if (orig != data)
6559                               WREG32(RLC_PG_CNTL, data);
6560 
6561                     orig = data = RREG32(RLC_AUTO_PG_CTRL);
6562                     data &= ~AUTO_PG_EN;
6563                     if (orig != data)
6564                               WREG32(RLC_AUTO_PG_CTRL, data);
6565 
6566                     data = RREG32(DB_RENDER_CONTROL);
6567           }
6568 }
6569 
cik_get_cu_active_bitmap(struct radeon_device * rdev,u32 se,u32 sh)6570 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6571 {
6572           u32 mask = 0, tmp, tmp1;
6573           int i;
6574 
6575           mutex_lock(&rdev->grbm_idx_mutex);
6576           cik_select_se_sh(rdev, se, sh);
6577           tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6578           tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6579           cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6580           mutex_unlock(&rdev->grbm_idx_mutex);
6581 
6582           tmp &= 0xffff0000;
6583 
6584           tmp |= tmp1;
6585           tmp >>= 16;
6586 
6587           for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6588                     mask <<= 1;
6589                     mask |= 1;
6590           }
6591 
6592           return (~tmp) & mask;
6593 }
6594 
cik_init_ao_cu_mask(struct radeon_device * rdev)6595 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6596 {
6597           u32 i, j, k, active_cu_number = 0;
6598           u32 mask, counter, cu_bitmap;
6599           u32 tmp = 0;
6600 
6601           for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6602                     for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6603                               mask = 1;
6604                               cu_bitmap = 0;
6605                               counter = 0;
6606                               for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6607                                         if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6608                                                   if (counter < 2)
6609                                                             cu_bitmap |= mask;
6610                                                   counter ++;
6611                                         }
6612                                         mask <<= 1;
6613                               }
6614 
6615                               active_cu_number += counter;
6616                               tmp |= (cu_bitmap << (i * 16 + j * 8));
6617                     }
6618           }
6619 
6620           WREG32(RLC_PG_AO_CU_MASK, tmp);
6621 
6622           tmp = RREG32(RLC_MAX_PG_CU);
6623           tmp &= ~MAX_PU_CU_MASK;
6624           tmp |= MAX_PU_CU(active_cu_number);
6625           WREG32(RLC_MAX_PG_CU, tmp);
6626 }
6627 
cik_enable_gfx_static_mgpg(struct radeon_device * rdev,bool enable)6628 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6629                                                bool enable)
6630 {
6631           u32 data, orig;
6632 
6633           orig = data = RREG32(RLC_PG_CNTL);
6634           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6635                     data |= STATIC_PER_CU_PG_ENABLE;
6636           else
6637                     data &= ~STATIC_PER_CU_PG_ENABLE;
6638           if (orig != data)
6639                     WREG32(RLC_PG_CNTL, data);
6640 }
6641 
cik_enable_gfx_dynamic_mgpg(struct radeon_device * rdev,bool enable)6642 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6643                                                   bool enable)
6644 {
6645           u32 data, orig;
6646 
6647           orig = data = RREG32(RLC_PG_CNTL);
6648           if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6649                     data |= DYN_PER_CU_PG_ENABLE;
6650           else
6651                     data &= ~DYN_PER_CU_PG_ENABLE;
6652           if (orig != data)
6653                     WREG32(RLC_PG_CNTL, data);
6654 }
6655 
6656 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6657 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6658 
cik_init_gfx_cgpg(struct radeon_device * rdev)6659 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6660 {
6661           u32 data, orig;
6662           u32 i;
6663 
6664           if (rdev->rlc.cs_data) {
6665                     WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6666                     WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6667                     WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6668                     WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6669           } else {
6670                     WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6671                     for (i = 0; i < 3; i++)
6672                               WREG32(RLC_GPM_SCRATCH_DATA, 0);
6673           }
6674           if (rdev->rlc.reg_list) {
6675                     WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6676                     for (i = 0; i < rdev->rlc.reg_list_size; i++)
6677                               WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6678           }
6679 
6680           orig = data = RREG32(RLC_PG_CNTL);
6681           data |= GFX_PG_SRC;
6682           if (orig != data)
6683                     WREG32(RLC_PG_CNTL, data);
6684 
6685           WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6686           WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6687 
6688           data = RREG32(CP_RB_WPTR_POLL_CNTL);
6689           data &= ~IDLE_POLL_COUNT_MASK;
6690           data |= IDLE_POLL_COUNT(0x60);
6691           WREG32(CP_RB_WPTR_POLL_CNTL, data);
6692 
6693           data = 0x10101010;
6694           WREG32(RLC_PG_DELAY, data);
6695 
6696           data = RREG32(RLC_PG_DELAY_2);
6697           data &= ~0xff;
6698           data |= 0x3;
6699           WREG32(RLC_PG_DELAY_2, data);
6700 
6701           data = RREG32(RLC_AUTO_PG_CTRL);
6702           data &= ~GRBM_REG_SGIT_MASK;
6703           data |= GRBM_REG_SGIT(0x700);
6704           WREG32(RLC_AUTO_PG_CTRL, data);
6705 
6706 }
6707 
cik_update_gfx_pg(struct radeon_device * rdev,bool enable)6708 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6709 {
6710           cik_enable_gfx_cgpg(rdev, enable);
6711           cik_enable_gfx_static_mgpg(rdev, enable);
6712           cik_enable_gfx_dynamic_mgpg(rdev, enable);
6713 }
6714 
cik_get_csb_size(struct radeon_device * rdev)6715 u32 cik_get_csb_size(struct radeon_device *rdev)
6716 {
6717           u32 count = 0;
6718           const struct cs_section_def *sect = NULL;
6719           const struct cs_extent_def *ext = NULL;
6720 
6721           if (rdev->rlc.cs_data == NULL)
6722                     return 0;
6723 
6724           /* begin clear state */
6725           count += 2;
6726           /* context control state */
6727           count += 3;
6728 
6729           for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6730                     for (ext = sect->section; ext->extent != NULL; ++ext) {
6731                               if (sect->id == SECT_CONTEXT)
6732                                         count += 2 + ext->reg_count;
6733                               else
6734                                         return 0;
6735                     }
6736           }
6737           /* pa_sc_raster_config/pa_sc_raster_config1 */
6738           count += 4;
6739           /* end clear state */
6740           count += 2;
6741           /* clear state */
6742           count += 2;
6743 
6744           return count;
6745 }
6746 
cik_get_csb_buffer(struct radeon_device * rdev,volatile u32 * buffer)6747 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6748 {
6749           u32 count = 0, i;
6750           const struct cs_section_def *sect = NULL;
6751           const struct cs_extent_def *ext = NULL;
6752 
6753           if (rdev->rlc.cs_data == NULL)
6754                     return;
6755           if (buffer == NULL)
6756                     return;
6757 
6758           buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6759           buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6760 
6761           buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6762           buffer[count++] = cpu_to_le32(0x80000000);
6763           buffer[count++] = cpu_to_le32(0x80000000);
6764 
6765           for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6766                     for (ext = sect->section; ext->extent != NULL; ++ext) {
6767                               if (sect->id == SECT_CONTEXT) {
6768                                         buffer[count++] =
6769                                                   cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6770                                         buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6771                                         for (i = 0; i < ext->reg_count; i++)
6772                                                   buffer[count++] = cpu_to_le32(ext->extent[i]);
6773                               } else {
6774                                         return;
6775                               }
6776                     }
6777           }
6778 
6779           buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6780           buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6781           switch (rdev->family) {
6782           case CHIP_BONAIRE:
6783                     buffer[count++] = cpu_to_le32(0x16000012);
6784                     buffer[count++] = cpu_to_le32(0x00000000);
6785                     break;
6786           case CHIP_KAVERI:
6787                     buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6788                     buffer[count++] = cpu_to_le32(0x00000000);
6789                     break;
6790           case CHIP_KABINI:
6791           case CHIP_MULLINS:
6792                     buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6793                     buffer[count++] = cpu_to_le32(0x00000000);
6794                     break;
6795           case CHIP_HAWAII:
6796                     buffer[count++] = cpu_to_le32(0x3a00161a);
6797                     buffer[count++] = cpu_to_le32(0x0000002e);
6798                     break;
6799           default:
6800                     buffer[count++] = cpu_to_le32(0x00000000);
6801                     buffer[count++] = cpu_to_le32(0x00000000);
6802                     break;
6803           }
6804 
6805           buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6806           buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6807 
6808           buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6809           buffer[count++] = cpu_to_le32(0);
6810 }
6811 
cik_init_pg(struct radeon_device * rdev)6812 static void cik_init_pg(struct radeon_device *rdev)
6813 {
6814           if (rdev->pg_flags) {
6815                     cik_enable_sck_slowdown_on_pu(rdev, true);
6816                     cik_enable_sck_slowdown_on_pd(rdev, true);
6817                     if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6818                               cik_init_gfx_cgpg(rdev);
6819                               cik_enable_cp_pg(rdev, true);
6820                               cik_enable_gds_pg(rdev, true);
6821                     }
6822                     cik_init_ao_cu_mask(rdev);
6823                     cik_update_gfx_pg(rdev, true);
6824           }
6825 }
6826 
cik_fini_pg(struct radeon_device * rdev)6827 static void cik_fini_pg(struct radeon_device *rdev)
6828 {
6829           if (rdev->pg_flags) {
6830                     cik_update_gfx_pg(rdev, false);
6831                     if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6832                               cik_enable_cp_pg(rdev, false);
6833                               cik_enable_gds_pg(rdev, false);
6834                     }
6835           }
6836 }
6837 
6838 /*
6839  * Interrupts
6840  * Starting with r6xx, interrupts are handled via a ring buffer.
6841  * Ring buffers are areas of GPU accessible memory that the GPU
6842  * writes interrupt vectors into and the host reads vectors out of.
6843  * There is a rptr (read pointer) that determines where the
6844  * host is currently reading, and a wptr (write pointer)
6845  * which determines where the GPU has written.  When the
6846  * pointers are equal, the ring is idle.  When the GPU
6847  * writes vectors to the ring buffer, it increments the
6848  * wptr.  When there is an interrupt, the host then starts
6849  * fetching commands and processing them until the pointers are
6850  * equal again at which point it updates the rptr.
6851  */
6852 
6853 /**
6854  * cik_enable_interrupts - Enable the interrupt ring buffer
6855  *
6856  * @rdev: radeon_device pointer
6857  *
6858  * Enable the interrupt ring buffer (CIK).
6859  */
cik_enable_interrupts(struct radeon_device * rdev)6860 static void cik_enable_interrupts(struct radeon_device *rdev)
6861 {
6862           u32 ih_cntl = RREG32(IH_CNTL);
6863           u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6864 
6865           ih_cntl |= ENABLE_INTR;
6866           ih_rb_cntl |= IH_RB_ENABLE;
6867           WREG32(IH_CNTL, ih_cntl);
6868           WREG32(IH_RB_CNTL, ih_rb_cntl);
6869           rdev->ih.enabled = true;
6870 }
6871 
6872 /**
6873  * cik_disable_interrupts - Disable the interrupt ring buffer
6874  *
6875  * @rdev: radeon_device pointer
6876  *
6877  * Disable the interrupt ring buffer (CIK).
6878  */
cik_disable_interrupts(struct radeon_device * rdev)6879 static void cik_disable_interrupts(struct radeon_device *rdev)
6880 {
6881           u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6882           u32 ih_cntl = RREG32(IH_CNTL);
6883 
6884           ih_rb_cntl &= ~IH_RB_ENABLE;
6885           ih_cntl &= ~ENABLE_INTR;
6886           WREG32(IH_RB_CNTL, ih_rb_cntl);
6887           WREG32(IH_CNTL, ih_cntl);
6888           /* set rptr, wptr to 0 */
6889           WREG32(IH_RB_RPTR, 0);
6890           WREG32(IH_RB_WPTR, 0);
6891           rdev->ih.enabled = false;
6892           rdev->ih.rptr = 0;
6893 }
6894 
6895 /**
6896  * cik_disable_interrupt_state - Disable all interrupt sources
6897  *
6898  * @rdev: radeon_device pointer
6899  *
6900  * Clear all interrupt enable bits used by the driver (CIK).
6901  */
cik_disable_interrupt_state(struct radeon_device * rdev)6902 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6903 {
6904           u32 tmp;
6905 
6906           /* gfx ring */
6907           tmp = RREG32(CP_INT_CNTL_RING0) &
6908                     (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6909           WREG32(CP_INT_CNTL_RING0, tmp);
6910           /* sdma */
6911           tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6912           WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6913           tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6914           WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6915           /* compute queues */
6916           WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6917           WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6918           WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6919           WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6920           WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6921           WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6922           WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6923           WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6924           /* grbm */
6925           WREG32(GRBM_INT_CNTL, 0);
6926           /* SRBM */
6927           WREG32(SRBM_INT_CNTL, 0);
6928           /* vline/vblank, etc. */
6929           WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6930           WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6931           if (rdev->num_crtc >= 4) {
6932                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6933                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6934           }
6935           if (rdev->num_crtc >= 6) {
6936                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6937                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6938           }
6939           /* pflip */
6940           if (rdev->num_crtc >= 2) {
6941                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6942                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6943           }
6944           if (rdev->num_crtc >= 4) {
6945                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6946                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6947           }
6948           if (rdev->num_crtc >= 6) {
6949                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6950                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6951           }
6952 
6953           /* dac hotplug */
6954           WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6955 
6956           /* digital hotplug */
6957           tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6958           WREG32(DC_HPD1_INT_CONTROL, tmp);
6959           tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6960           WREG32(DC_HPD2_INT_CONTROL, tmp);
6961           tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6962           WREG32(DC_HPD3_INT_CONTROL, tmp);
6963           tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6964           WREG32(DC_HPD4_INT_CONTROL, tmp);
6965           tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6966           WREG32(DC_HPD5_INT_CONTROL, tmp);
6967           tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6968           WREG32(DC_HPD6_INT_CONTROL, tmp);
6969 
6970 }
6971 
6972 /**
6973  * cik_irq_init - init and enable the interrupt ring
6974  *
6975  * @rdev: radeon_device pointer
6976  *
6977  * Allocate a ring buffer for the interrupt controller,
6978  * enable the RLC, disable interrupts, enable the IH
6979  * ring buffer and enable it (CIK).
6980  * Called at device load and reume.
6981  * Returns 0 for success, errors for failure.
6982  */
cik_irq_init(struct radeon_device * rdev)6983 static int cik_irq_init(struct radeon_device *rdev)
6984 {
6985           int ret = 0;
6986           int rb_bufsz;
6987           u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6988 
6989           /* allocate ring */
6990           ret = r600_ih_ring_alloc(rdev);
6991           if (ret)
6992                     return ret;
6993 
6994           /* disable irqs */
6995           cik_disable_interrupts(rdev);
6996 
6997           /* init rlc */
6998           ret = cik_rlc_resume(rdev);
6999           if (ret) {
7000                     r600_ih_ring_fini(rdev);
7001                     return ret;
7002           }
7003 
7004           /* setup interrupt control */
7005           /* XXX this should actually be a bus address, not an MC address. same on older asics */
7006           WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7007           interrupt_cntl = RREG32(INTERRUPT_CNTL);
7008           /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7009            * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7010            */
7011           interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7012           /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7013           interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7014           WREG32(INTERRUPT_CNTL, interrupt_cntl);
7015 
7016           WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7017           rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7018 
7019           ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7020                           IH_WPTR_OVERFLOW_CLEAR |
7021                           (rb_bufsz << 1));
7022 
7023           if (rdev->wb.enabled)
7024                     ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7025 
7026           /* set the writeback address whether it's enabled or not */
7027           WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7028           WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7029 
7030           WREG32(IH_RB_CNTL, ih_rb_cntl);
7031 
7032           /* set rptr, wptr to 0 */
7033           WREG32(IH_RB_RPTR, 0);
7034           WREG32(IH_RB_WPTR, 0);
7035 
7036           /* Default settings for IH_CNTL (disabled at first) */
7037           ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7038           /* RPTR_REARM only works if msi's are enabled */
7039           if (rdev->msi_enabled)
7040                     ih_cntl |= RPTR_REARM;
7041           WREG32(IH_CNTL, ih_cntl);
7042 
7043           /* force the active interrupt state to all disabled */
7044           cik_disable_interrupt_state(rdev);
7045 
7046           pci_set_master(rdev->pdev);
7047 
7048           /* enable irqs */
7049           cik_enable_interrupts(rdev);
7050 
7051           return ret;
7052 }
7053 
7054 /**
7055  * cik_irq_set - enable/disable interrupt sources
7056  *
7057  * @rdev: radeon_device pointer
7058  *
7059  * Enable interrupt sources on the GPU (vblanks, hpd,
7060  * etc.) (CIK).
7061  * Returns 0 for success, errors for failure.
7062  */
cik_irq_set(struct radeon_device * rdev)7063 int cik_irq_set(struct radeon_device *rdev)
7064 {
7065           u32 cp_int_cntl;
7066           u32 cp_m1p0;
7067           u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7068           u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7069           u32 grbm_int_cntl = 0;
7070           u32 dma_cntl, dma_cntl1;
7071 
7072           if (!rdev->irq.installed) {
7073                     WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7074                     return -EINVAL;
7075           }
7076           /* don't enable anything if the ih is disabled */
7077           if (!rdev->ih.enabled) {
7078                     cik_disable_interrupts(rdev);
7079                     /* force the active interrupt state to all disabled */
7080                     cik_disable_interrupt_state(rdev);
7081                     return 0;
7082           }
7083 
7084           cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7085                     (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7086           cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7087 
7088           hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7089           hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7090           hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7091           hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7092           hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7093           hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7094 
7095           dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7096           dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7097 
7098           cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7099 
7100           /* enable CP interrupts on all rings */
7101           if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7102                     DRM_DEBUG("cik_irq_set: sw int gfx\n");
7103                     cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7104           }
7105           if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7106                     struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7107                     DRM_DEBUG("si_irq_set: sw int cp1\n");
7108                     if (ring->me == 1) {
7109                               switch (ring->pipe) {
7110                               case 0:
7111                                         cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7112                                         break;
7113                               default:
7114                                         DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7115                                         break;
7116                               }
7117                     } else {
7118                               DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7119                     }
7120           }
7121           if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7122                     struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7123                     DRM_DEBUG("si_irq_set: sw int cp2\n");
7124                     if (ring->me == 1) {
7125                               switch (ring->pipe) {
7126                               case 0:
7127                                         cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7128                                         break;
7129                               default:
7130                                         DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7131                                         break;
7132                               }
7133                     } else {
7134                               DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7135                     }
7136           }
7137 
7138           if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7139                     DRM_DEBUG("cik_irq_set: sw int dma\n");
7140                     dma_cntl |= TRAP_ENABLE;
7141           }
7142 
7143           if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7144                     DRM_DEBUG("cik_irq_set: sw int dma1\n");
7145                     dma_cntl1 |= TRAP_ENABLE;
7146           }
7147 
7148           if (rdev->irq.crtc_vblank_int[0] ||
7149               atomic_read(&rdev->irq.pflip[0])) {
7150                     DRM_DEBUG("cik_irq_set: vblank 0\n");
7151                     crtc1 |= VBLANK_INTERRUPT_MASK;
7152           }
7153           if (rdev->irq.crtc_vblank_int[1] ||
7154               atomic_read(&rdev->irq.pflip[1])) {
7155                     DRM_DEBUG("cik_irq_set: vblank 1\n");
7156                     crtc2 |= VBLANK_INTERRUPT_MASK;
7157           }
7158           if (rdev->irq.crtc_vblank_int[2] ||
7159               atomic_read(&rdev->irq.pflip[2])) {
7160                     DRM_DEBUG("cik_irq_set: vblank 2\n");
7161                     crtc3 |= VBLANK_INTERRUPT_MASK;
7162           }
7163           if (rdev->irq.crtc_vblank_int[3] ||
7164               atomic_read(&rdev->irq.pflip[3])) {
7165                     DRM_DEBUG("cik_irq_set: vblank 3\n");
7166                     crtc4 |= VBLANK_INTERRUPT_MASK;
7167           }
7168           if (rdev->irq.crtc_vblank_int[4] ||
7169               atomic_read(&rdev->irq.pflip[4])) {
7170                     DRM_DEBUG("cik_irq_set: vblank 4\n");
7171                     crtc5 |= VBLANK_INTERRUPT_MASK;
7172           }
7173           if (rdev->irq.crtc_vblank_int[5] ||
7174               atomic_read(&rdev->irq.pflip[5])) {
7175                     DRM_DEBUG("cik_irq_set: vblank 5\n");
7176                     crtc6 |= VBLANK_INTERRUPT_MASK;
7177           }
7178           if (rdev->irq.hpd[0]) {
7179                     DRM_DEBUG("cik_irq_set: hpd 1\n");
7180                     hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7181           }
7182           if (rdev->irq.hpd[1]) {
7183                     DRM_DEBUG("cik_irq_set: hpd 2\n");
7184                     hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7185           }
7186           if (rdev->irq.hpd[2]) {
7187                     DRM_DEBUG("cik_irq_set: hpd 3\n");
7188                     hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7189           }
7190           if (rdev->irq.hpd[3]) {
7191                     DRM_DEBUG("cik_irq_set: hpd 4\n");
7192                     hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7193           }
7194           if (rdev->irq.hpd[4]) {
7195                     DRM_DEBUG("cik_irq_set: hpd 5\n");
7196                     hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7197           }
7198           if (rdev->irq.hpd[5]) {
7199                     DRM_DEBUG("cik_irq_set: hpd 6\n");
7200                     hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7201           }
7202 
7203           WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7204 
7205           WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7206           WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7207 
7208           WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7209 
7210           WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7211 
7212           WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7213           WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7214           if (rdev->num_crtc >= 4) {
7215                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7216                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7217           }
7218           if (rdev->num_crtc >= 6) {
7219                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7220                     WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7221           }
7222 
7223           if (rdev->num_crtc >= 2) {
7224                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7225                            GRPH_PFLIP_INT_MASK);
7226                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227                            GRPH_PFLIP_INT_MASK);
7228           }
7229           if (rdev->num_crtc >= 4) {
7230                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7231                            GRPH_PFLIP_INT_MASK);
7232                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7233                            GRPH_PFLIP_INT_MASK);
7234           }
7235           if (rdev->num_crtc >= 6) {
7236                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7237                            GRPH_PFLIP_INT_MASK);
7238                     WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7239                            GRPH_PFLIP_INT_MASK);
7240           }
7241 
7242           WREG32(DC_HPD1_INT_CONTROL, hpd1);
7243           WREG32(DC_HPD2_INT_CONTROL, hpd2);
7244           WREG32(DC_HPD3_INT_CONTROL, hpd3);
7245           WREG32(DC_HPD4_INT_CONTROL, hpd4);
7246           WREG32(DC_HPD5_INT_CONTROL, hpd5);
7247           WREG32(DC_HPD6_INT_CONTROL, hpd6);
7248 
7249           /* posting read */
7250           RREG32(SRBM_STATUS);
7251 
7252           return 0;
7253 }
7254 
7255 /**
7256  * cik_irq_ack - ack interrupt sources
7257  *
7258  * @rdev: radeon_device pointer
7259  *
7260  * Ack interrupt sources on the GPU (vblanks, hpd,
7261  * etc.) (CIK).  Certain interrupts sources are sw
7262  * generated and do not require an explicit ack.
7263  */
cik_irq_ack(struct radeon_device * rdev)7264 static inline void cik_irq_ack(struct radeon_device *rdev)
7265 {
7266           u32 tmp;
7267 
7268           rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7269           rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7270           rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7271           rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7272           rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7273           rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7274           rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7275 
7276           rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7277                     EVERGREEN_CRTC0_REGISTER_OFFSET);
7278           rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7279                     EVERGREEN_CRTC1_REGISTER_OFFSET);
7280           if (rdev->num_crtc >= 4) {
7281                     rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7282                               EVERGREEN_CRTC2_REGISTER_OFFSET);
7283                     rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7284                               EVERGREEN_CRTC3_REGISTER_OFFSET);
7285           }
7286           if (rdev->num_crtc >= 6) {
7287                     rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7288                               EVERGREEN_CRTC4_REGISTER_OFFSET);
7289                     rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7290                               EVERGREEN_CRTC5_REGISTER_OFFSET);
7291           }
7292 
7293           if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7294                     WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7295                            GRPH_PFLIP_INT_CLEAR);
7296           if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7297                     WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7298                            GRPH_PFLIP_INT_CLEAR);
7299           if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7300                     WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7301           if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7302                     WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7303           if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7304                     WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7305           if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7306                     WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7307 
7308           if (rdev->num_crtc >= 4) {
7309                     if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7310                               WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7311                                      GRPH_PFLIP_INT_CLEAR);
7312                     if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7313                               WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7314                                      GRPH_PFLIP_INT_CLEAR);
7315                     if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7316                               WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7317                     if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7318                               WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7319                     if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7320                               WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7321                     if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7322                               WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7323           }
7324 
7325           if (rdev->num_crtc >= 6) {
7326                     if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7327                               WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7328                                      GRPH_PFLIP_INT_CLEAR);
7329                     if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7330                               WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7331                                      GRPH_PFLIP_INT_CLEAR);
7332                     if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7333                               WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7334                     if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7335                               WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7336                     if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7337                               WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7338                     if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7339                               WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7340           }
7341 
7342           if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7343                     tmp = RREG32(DC_HPD1_INT_CONTROL);
7344                     tmp |= DC_HPDx_INT_ACK;
7345                     WREG32(DC_HPD1_INT_CONTROL, tmp);
7346           }
7347           if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7348                     tmp = RREG32(DC_HPD2_INT_CONTROL);
7349                     tmp |= DC_HPDx_INT_ACK;
7350                     WREG32(DC_HPD2_INT_CONTROL, tmp);
7351           }
7352           if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7353                     tmp = RREG32(DC_HPD3_INT_CONTROL);
7354                     tmp |= DC_HPDx_INT_ACK;
7355                     WREG32(DC_HPD3_INT_CONTROL, tmp);
7356           }
7357           if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7358                     tmp = RREG32(DC_HPD4_INT_CONTROL);
7359                     tmp |= DC_HPDx_INT_ACK;
7360                     WREG32(DC_HPD4_INT_CONTROL, tmp);
7361           }
7362           if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7363                     tmp = RREG32(DC_HPD5_INT_CONTROL);
7364                     tmp |= DC_HPDx_INT_ACK;
7365                     WREG32(DC_HPD5_INT_CONTROL, tmp);
7366           }
7367           if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7368                     tmp = RREG32(DC_HPD6_INT_CONTROL);
7369                     tmp |= DC_HPDx_INT_ACK;
7370                     WREG32(DC_HPD6_INT_CONTROL, tmp);
7371           }
7372           if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7373                     tmp = RREG32(DC_HPD1_INT_CONTROL);
7374                     tmp |= DC_HPDx_RX_INT_ACK;
7375                     WREG32(DC_HPD1_INT_CONTROL, tmp);
7376           }
7377           if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7378                     tmp = RREG32(DC_HPD2_INT_CONTROL);
7379                     tmp |= DC_HPDx_RX_INT_ACK;
7380                     WREG32(DC_HPD2_INT_CONTROL, tmp);
7381           }
7382           if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7383                     tmp = RREG32(DC_HPD3_INT_CONTROL);
7384                     tmp |= DC_HPDx_RX_INT_ACK;
7385                     WREG32(DC_HPD3_INT_CONTROL, tmp);
7386           }
7387           if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7388                     tmp = RREG32(DC_HPD4_INT_CONTROL);
7389                     tmp |= DC_HPDx_RX_INT_ACK;
7390                     WREG32(DC_HPD4_INT_CONTROL, tmp);
7391           }
7392           if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7393                     tmp = RREG32(DC_HPD5_INT_CONTROL);
7394                     tmp |= DC_HPDx_RX_INT_ACK;
7395                     WREG32(DC_HPD5_INT_CONTROL, tmp);
7396           }
7397           if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7398                     tmp = RREG32(DC_HPD6_INT_CONTROL);
7399                     tmp |= DC_HPDx_RX_INT_ACK;
7400                     WREG32(DC_HPD6_INT_CONTROL, tmp);
7401           }
7402 }
7403 
7404 /**
7405  * cik_irq_disable - disable interrupts
7406  *
7407  * @rdev: radeon_device pointer
7408  *
7409  * Disable interrupts on the hw (CIK).
7410  */
cik_irq_disable(struct radeon_device * rdev)7411 static void cik_irq_disable(struct radeon_device *rdev)
7412 {
7413           cik_disable_interrupts(rdev);
7414           /* Wait and acknowledge irq */
7415           mdelay(1);
7416           cik_irq_ack(rdev);
7417           cik_disable_interrupt_state(rdev);
7418 }
7419 
7420 /**
7421  * cik_irq_disable - disable interrupts for suspend
7422  *
7423  * @rdev: radeon_device pointer
7424  *
7425  * Disable interrupts and stop the RLC (CIK).
7426  * Used for suspend.
7427  */
cik_irq_suspend(struct radeon_device * rdev)7428 static void cik_irq_suspend(struct radeon_device *rdev)
7429 {
7430           cik_irq_disable(rdev);
7431           cik_rlc_stop(rdev);
7432 }
7433 
7434 /**
7435  * cik_irq_fini - tear down interrupt support
7436  *
7437  * @rdev: radeon_device pointer
7438  *
7439  * Disable interrupts on the hw and free the IH ring
7440  * buffer (CIK).
7441  * Used for driver unload.
7442  */
cik_irq_fini(struct radeon_device * rdev)7443 static void cik_irq_fini(struct radeon_device *rdev)
7444 {
7445           cik_irq_suspend(rdev);
7446           r600_ih_ring_fini(rdev);
7447 }
7448 
7449 /**
7450  * cik_get_ih_wptr - get the IH ring buffer wptr
7451  *
7452  * @rdev: radeon_device pointer
7453  *
7454  * Get the IH ring buffer wptr from either the register
7455  * or the writeback memory buffer (CIK).  Also check for
7456  * ring buffer overflow and deal with it.
7457  * Used by cik_irq_process().
7458  * Returns the value of the wptr.
7459  */
cik_get_ih_wptr(struct radeon_device * rdev)7460 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7461 {
7462           u32 wptr, tmp;
7463 
7464           if (rdev->wb.enabled)
7465                     wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7466           else
7467                     wptr = RREG32(IH_RB_WPTR);
7468 
7469           if (wptr & RB_OVERFLOW) {
7470                     wptr &= ~RB_OVERFLOW;
7471                     /* When a ring buffer overflow happen start parsing interrupt
7472                      * from the last not overwritten vector (wptr + 16). Hopefully
7473                      * this should allow us to catchup.
7474                      */
7475                     dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7476                                wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7477                     rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7478                     tmp = RREG32(IH_RB_CNTL);
7479                     tmp |= IH_WPTR_OVERFLOW_CLEAR;
7480                     WREG32(IH_RB_CNTL, tmp);
7481           }
7482           return (wptr & rdev->ih.ptr_mask);
7483 }
7484 
7485 /*        CIK IV Ring
7486  * Each IV ring entry is 128 bits:
7487  * [7:0]    - interrupt source id
7488  * [31:8]   - reserved
7489  * [59:32]  - interrupt source data
7490  * [63:60]  - reserved
7491  * [71:64]  - RINGID
7492  *            CP:
7493  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7494  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7495  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7496  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7497  *            PIPE_ID - ME0 0=3D
7498  *                    - ME1&2 compute dispatcher (4 pipes each)
7499  *            SDMA:
7500  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7501  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7502  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7503  * [79:72]  - VMID
7504  * [95:80]  - PASID
7505  * [127:96] - reserved
7506  */
7507 /**
7508  * cik_irq_process - interrupt handler
7509  *
7510  * @rdev: radeon_device pointer
7511  *
7512  * Interrupt hander (CIK).  Walk the IH ring,
7513  * ack interrupts and schedule work to handle
7514  * interrupt events.
7515  * Returns irq process return code.
7516  */
cik_irq_process(struct radeon_device * rdev)7517 irqreturn_t cik_irq_process(struct radeon_device *rdev)
7518 {
7519           struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7520           struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7521           u32 wptr;
7522           u32 rptr;
7523           u32 src_id, src_data, ring_id;
7524           u8 me_id, pipe_id, queue_id;
7525           u32 ring_index;
7526           bool queue_hotplug = false;
7527           bool queue_dp = false;
7528           bool queue_reset = false;
7529           u32 addr, status, mc_client;
7530           bool queue_thermal = false;
7531 
7532           if (!rdev->ih.enabled || rdev->shutdown)
7533                     return IRQ_NONE;
7534 
7535           wptr = cik_get_ih_wptr(rdev);
7536 
7537 restart_ih:
7538           /* is somebody else already processing irqs? */
7539           if (atomic_xchg(&rdev->ih.lock, 1))
7540                     return IRQ_NONE;
7541 
7542           rptr = rdev->ih.rptr;
7543           DRM_DEBUG_VBLANK("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7544 
7545           /* Order reading of wptr vs. reading of IH ring data */
7546           rmb();
7547 
7548           /* display interrupts */
7549           cik_irq_ack(rdev);
7550 
7551           while (rptr != wptr) {
7552                     /* wptr/rptr are in bytes! */
7553                     ring_index = rptr / 4;
7554 
7555                     src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7556                     src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7557                     ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7558 
7559                     switch (src_id) {
7560                     case 1: /* D1 vblank/vline */
7561                               switch (src_data) {
7562                               case 0: /* D1 vblank */
7563                                         if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7564                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7565 
7566                                         if (rdev->irq.crtc_vblank_int[0]) {
7567                                                   drm_handle_vblank(rdev->ddev, 0);
7568                                                   rdev->pm.vblank_sync = true;
7569                                                   wake_up(&rdev->irq.vblank_queue);
7570                                         }
7571                                         if (atomic_read(&rdev->irq.pflip[0]))
7572                                                   radeon_crtc_handle_vblank(rdev, 0);
7573                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7574                                         DRM_DEBUG_VBLANK("IH: D1 vblank\n");
7575 
7576                                         break;
7577                               case 1: /* D1 vline */
7578                                         if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7579                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7580 
7581                                         rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7582                                         DRM_DEBUG_VBLANK("IH: D1 vline\n");
7583 
7584                                         break;
7585                               default:
7586                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7587                                         break;
7588                               }
7589                               break;
7590                     case 2: /* D2 vblank/vline */
7591                               switch (src_data) {
7592                               case 0: /* D2 vblank */
7593                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7594                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7595 
7596                                         if (rdev->irq.crtc_vblank_int[1]) {
7597                                                   drm_handle_vblank(rdev->ddev, 1);
7598                                                   rdev->pm.vblank_sync = true;
7599                                                   wake_up(&rdev->irq.vblank_queue);
7600                                         }
7601                                         if (atomic_read(&rdev->irq.pflip[1]))
7602                                                   radeon_crtc_handle_vblank(rdev, 1);
7603                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7604                                         DRM_DEBUG_VBLANK("IH: D2 vblank\n");
7605 
7606                                         break;
7607                               case 1: /* D2 vline */
7608                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7609                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7610 
7611                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7612                                         DRM_DEBUG_VBLANK("IH: D2 vline\n");
7613 
7614                                         break;
7615                               default:
7616                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7617                                         break;
7618                               }
7619                               break;
7620                     case 3: /* D3 vblank/vline */
7621                               switch (src_data) {
7622                               case 0: /* D3 vblank */
7623                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7624                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7625 
7626                                         if (rdev->irq.crtc_vblank_int[2]) {
7627                                                   drm_handle_vblank(rdev->ddev, 2);
7628                                                   rdev->pm.vblank_sync = true;
7629                                                   wake_up(&rdev->irq.vblank_queue);
7630                                         }
7631                                         if (atomic_read(&rdev->irq.pflip[2]))
7632                                                   radeon_crtc_handle_vblank(rdev, 2);
7633                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7634                                         DRM_DEBUG_VBLANK("IH: D3 vblank\n");
7635 
7636                                         break;
7637                               case 1: /* D3 vline */
7638                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7639                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7640 
7641                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7642                                         DRM_DEBUG_VBLANK("IH: D3 vline\n");
7643 
7644                                         break;
7645                               default:
7646                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7647                                         break;
7648                               }
7649                               break;
7650                     case 4: /* D4 vblank/vline */
7651                               switch (src_data) {
7652                               case 0: /* D4 vblank */
7653                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7654                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7655 
7656                                         if (rdev->irq.crtc_vblank_int[3]) {
7657                                                   drm_handle_vblank(rdev->ddev, 3);
7658                                                   rdev->pm.vblank_sync = true;
7659                                                   wake_up(&rdev->irq.vblank_queue);
7660                                         }
7661                                         if (atomic_read(&rdev->irq.pflip[3]))
7662                                                   radeon_crtc_handle_vblank(rdev, 3);
7663                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7664                                         DRM_DEBUG_VBLANK("IH: D4 vblank\n");
7665 
7666                                         break;
7667                               case 1: /* D4 vline */
7668                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7669                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7670 
7671                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7672                                         DRM_DEBUG_VBLANK("IH: D4 vline\n");
7673 
7674                                         break;
7675                               default:
7676                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7677                                         break;
7678                               }
7679                               break;
7680                     case 5: /* D5 vblank/vline */
7681                               switch (src_data) {
7682                               case 0: /* D5 vblank */
7683                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7684                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7685 
7686                                         if (rdev->irq.crtc_vblank_int[4]) {
7687                                                   drm_handle_vblank(rdev->ddev, 4);
7688                                                   rdev->pm.vblank_sync = true;
7689                                                   wake_up(&rdev->irq.vblank_queue);
7690                                         }
7691                                         if (atomic_read(&rdev->irq.pflip[4]))
7692                                                   radeon_crtc_handle_vblank(rdev, 4);
7693                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7694                                         DRM_DEBUG_VBLANK("IH: D5 vblank\n");
7695 
7696                                         break;
7697                               case 1: /* D5 vline */
7698                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7699                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7700 
7701                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7702                                         DRM_DEBUG("IH: D5 vline\n");
7703 
7704                                         break;
7705                               default:
7706                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7707                                         break;
7708                               }
7709                               break;
7710                     case 6: /* D6 vblank/vline */
7711                               switch (src_data) {
7712                               case 0: /* D6 vblank */
7713                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7714                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7715 
7716                                         if (rdev->irq.crtc_vblank_int[5]) {
7717                                                   drm_handle_vblank(rdev->ddev, 5);
7718                                                   rdev->pm.vblank_sync = true;
7719                                                   wake_up(&rdev->irq.vblank_queue);
7720                                         }
7721                                         if (atomic_read(&rdev->irq.pflip[5]))
7722                                                   radeon_crtc_handle_vblank(rdev, 5);
7723                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7724                                         DRM_DEBUG_VBLANK("IH: D6 vblank\n");
7725 
7726                                         break;
7727                               case 1: /* D6 vline */
7728                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7729                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7730 
7731                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7732                                         DRM_DEBUG_VBLANK("IH: D6 vline\n");
7733 
7734                                         break;
7735                               default:
7736                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7737                                         break;
7738                               }
7739                               break;
7740                     case 8: /* D1 page flip */
7741                     case 10: /* D2 page flip */
7742                     case 12: /* D3 page flip */
7743                     case 14: /* D4 page flip */
7744                     case 16: /* D5 page flip */
7745                     case 18: /* D6 page flip */
7746                               DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7747                               if (radeon_use_pflipirq > 0)
7748                                         radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7749                               break;
7750                     case 42: /* HPD hotplug */
7751                               switch (src_data) {
7752                               case 0:
7753                                         if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7754                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7755 
7756                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7757                                         queue_hotplug = true;
7758                                         DRM_DEBUG("IH: HPD1\n");
7759 
7760                                         break;
7761                               case 1:
7762                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7763                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7764 
7765                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7766                                         queue_hotplug = true;
7767                                         DRM_DEBUG("IH: HPD2\n");
7768 
7769                                         break;
7770                               case 2:
7771                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7772                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7773 
7774                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7775                                         queue_hotplug = true;
7776                                         DRM_DEBUG("IH: HPD3\n");
7777 
7778                                         break;
7779                               case 3:
7780                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7781                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7782 
7783                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7784                                         queue_hotplug = true;
7785                                         DRM_DEBUG("IH: HPD4\n");
7786 
7787                                         break;
7788                               case 4:
7789                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7790                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791 
7792                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7793                                         queue_hotplug = true;
7794                                         DRM_DEBUG("IH: HPD5\n");
7795 
7796                                         break;
7797                               case 5:
7798                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7799                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800 
7801                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7802                                         queue_hotplug = true;
7803                                         DRM_DEBUG("IH: HPD6\n");
7804 
7805                                         break;
7806                               case 6:
7807                                         if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7808                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809 
7810                                         rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7811                                         queue_dp = true;
7812                                         DRM_DEBUG("IH: HPD_RX 1\n");
7813 
7814                                         break;
7815                               case 7:
7816                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7817                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818 
7819                                         rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7820                                         queue_dp = true;
7821                                         DRM_DEBUG("IH: HPD_RX 2\n");
7822 
7823                                         break;
7824                               case 8:
7825                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7826                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827 
7828                                         rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7829                                         queue_dp = true;
7830                                         DRM_DEBUG("IH: HPD_RX 3\n");
7831 
7832                                         break;
7833                               case 9:
7834                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7835                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836 
7837                                         rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7838                                         queue_dp = true;
7839                                         DRM_DEBUG("IH: HPD_RX 4\n");
7840 
7841                                         break;
7842                               case 10:
7843                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7844                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845 
7846                                         rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7847                                         queue_dp = true;
7848                                         DRM_DEBUG("IH: HPD_RX 5\n");
7849 
7850                                         break;
7851                               case 11:
7852                                         if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7853                                                   DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854 
7855                                         rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7856                                         queue_dp = true;
7857                                         DRM_DEBUG("IH: HPD_RX 6\n");
7858 
7859                                         break;
7860                               default:
7861                                         DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7862                                         break;
7863                               }
7864                               break;
7865                     case 96:
7866                               DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7867                               WREG32(SRBM_INT_ACK, 0x1);
7868                               break;
7869                     case 124: /* UVD */
7870                               DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7871                               radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7872                               break;
7873                     case 146:
7874                     case 147:
7875                               addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7876                               status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7877                               mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7878                               /* reset addr and status */
7879                               WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7880                               if (addr == 0x0 && status == 0x0)
7881                                         break;
7882                               dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7883                               dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7884                                         addr);
7885                               dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7886                                         status);
7887                               cik_vm_decode_fault(rdev, status, addr, mc_client);
7888                               break;
7889                     case 167: /* VCE */
7890                               DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7891                               switch (src_data) {
7892                               case 0:
7893                                         radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7894                                         break;
7895                               case 1:
7896                                         radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7897                                         break;
7898                               default:
7899                                         DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7900                                         break;
7901                               }
7902                               break;
7903                     case 176: /* GFX RB CP_INT */
7904                     case 177: /* GFX IB CP_INT */
7905                               radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7906                               break;
7907                     case 181: /* CP EOP event */
7908                               DRM_DEBUG("IH: CP EOP\n");
7909                               /* XXX check the bitfield order! */
7910                               me_id = (ring_id & 0x60) >> 5;
7911                               pipe_id = (ring_id & 0x18) >> 3;
7912                               queue_id = (ring_id & 0x7) >> 0;
7913                               switch (me_id) {
7914                               case 0:
7915                                         radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7916                                         break;
7917                               case 1:
7918                               case 2:
7919                                         if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7920                                                   radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7921                                         if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7922                                                   radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7923                                         break;
7924                               }
7925                               break;
7926                     case 184: /* CP Privileged reg access */
7927                               DRM_ERROR("Illegal register access in command stream\n");
7928                               /* XXX check the bitfield order! */
7929                               me_id = (ring_id & 0x60) >> 5;
7930                               pipe_id = (ring_id & 0x18) >> 3;
7931                               queue_id = (ring_id & 0x7) >> 0;
7932                               switch (me_id) {
7933                               case 0:
7934                                         /* This results in a full GPU reset, but all we need to do is soft
7935                                          * reset the CP for gfx
7936                                          */
7937                                         queue_reset = true;
7938                                         break;
7939                               case 1:
7940                                         /* XXX compute */
7941                                         queue_reset = true;
7942                                         break;
7943                               case 2:
7944                                         /* XXX compute */
7945                                         queue_reset = true;
7946                                         break;
7947                               }
7948                               break;
7949                     case 185: /* CP Privileged inst */
7950                               DRM_ERROR("Illegal instruction in command stream\n");
7951                               /* XXX check the bitfield order! */
7952                               me_id = (ring_id & 0x60) >> 5;
7953                               pipe_id = (ring_id & 0x18) >> 3;
7954                               queue_id = (ring_id & 0x7) >> 0;
7955                               switch (me_id) {
7956                               case 0:
7957                                         /* This results in a full GPU reset, but all we need to do is soft
7958                                          * reset the CP for gfx
7959                                          */
7960                                         queue_reset = true;
7961                                         break;
7962                               case 1:
7963                                         /* XXX compute */
7964                                         queue_reset = true;
7965                                         break;
7966                               case 2:
7967                                         /* XXX compute */
7968                                         queue_reset = true;
7969                                         break;
7970                               }
7971                               break;
7972                     case 224: /* SDMA trap event */
7973                               /* XXX check the bitfield order! */
7974                               me_id = (ring_id & 0x3) >> 0;
7975                               queue_id = (ring_id & 0xc) >> 2;
7976                               DRM_DEBUG("IH: SDMA trap\n");
7977                               switch (me_id) {
7978                               case 0:
7979                                         switch (queue_id) {
7980                                         case 0:
7981                                                   radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7982                                                   break;
7983                                         case 1:
7984                                                   /* XXX compute */
7985                                                   break;
7986                                         case 2:
7987                                                   /* XXX compute */
7988                                                   break;
7989                                         }
7990                                         break;
7991                               case 1:
7992                                         switch (queue_id) {
7993                                         case 0:
7994                                                   radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7995                                                   break;
7996                                         case 1:
7997                                                   /* XXX compute */
7998                                                   break;
7999                                         case 2:
8000                                                   /* XXX compute */
8001                                                   break;
8002                                         }
8003                                         break;
8004                               }
8005                               break;
8006                     case 230: /* thermal low to high */
8007                               DRM_DEBUG("IH: thermal low to high\n");
8008                               rdev->pm.dpm.thermal.high_to_low = false;
8009                               queue_thermal = true;
8010                               break;
8011                     case 231: /* thermal high to low */
8012                               DRM_DEBUG("IH: thermal high to low\n");
8013                               rdev->pm.dpm.thermal.high_to_low = true;
8014                               queue_thermal = true;
8015                               break;
8016                     case 233: /* GUI IDLE */
8017                               DRM_DEBUG("IH: GUI idle\n");
8018                               break;
8019                     case 241: /* SDMA Privileged inst */
8020                     case 247: /* SDMA Privileged inst */
8021                               DRM_ERROR("Illegal instruction in SDMA command stream\n");
8022                               /* XXX check the bitfield order! */
8023                               me_id = (ring_id & 0x3) >> 0;
8024                               queue_id = (ring_id & 0xc) >> 2;
8025                               switch (me_id) {
8026                               case 0:
8027                                         switch (queue_id) {
8028                                         case 0:
8029                                                   queue_reset = true;
8030                                                   break;
8031                                         case 1:
8032                                                   /* XXX compute */
8033                                                   queue_reset = true;
8034                                                   break;
8035                                         case 2:
8036                                                   /* XXX compute */
8037                                                   queue_reset = true;
8038                                                   break;
8039                                         }
8040                                         break;
8041                               case 1:
8042                                         switch (queue_id) {
8043                                         case 0:
8044                                                   queue_reset = true;
8045                                                   break;
8046                                         case 1:
8047                                                   /* XXX compute */
8048                                                   queue_reset = true;
8049                                                   break;
8050                                         case 2:
8051                                                   /* XXX compute */
8052                                                   queue_reset = true;
8053                                                   break;
8054                                         }
8055                                         break;
8056                               }
8057                               break;
8058                     default:
8059                               DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8060                               break;
8061                     }
8062 
8063                     /* wptr/rptr are in bytes! */
8064                     rptr += 16;
8065                     rptr &= rdev->ih.ptr_mask;
8066                     WREG32(IH_RB_RPTR, rptr);
8067           }
8068           if (queue_dp)
8069                     schedule_work(&rdev->dp_work);
8070           if (queue_hotplug)
8071                     schedule_delayed_work(&rdev->hotplug_work, 0);
8072           if (queue_reset) {
8073                     rdev->needs_reset = true;
8074                     wake_up_all(&rdev->fence_queue);
8075           }
8076           if (queue_thermal)
8077                     schedule_work(&rdev->pm.dpm.thermal.work);
8078           rdev->ih.rptr = rptr;
8079           atomic_set(&rdev->ih.lock, 0);
8080 
8081           /* make sure wptr hasn't changed while processing */
8082           wptr = cik_get_ih_wptr(rdev);
8083           if (wptr != rptr)
8084                     goto restart_ih;
8085 
8086           return IRQ_HANDLED;
8087 }
8088 
8089 /*
8090  * startup/shutdown callbacks
8091  */
cik_uvd_init(struct radeon_device * rdev)8092 static void cik_uvd_init(struct radeon_device *rdev)
8093 {
8094           int r;
8095 
8096           if (!rdev->has_uvd)
8097                     return;
8098 
8099           r = radeon_uvd_init(rdev);
8100           if (r) {
8101                     dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8102                     /*
8103                      * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8104                      * to early fails cik_uvd_start() and thus nothing happens
8105                      * there. So it is pointless to try to go through that code
8106                      * hence why we disable uvd here.
8107                      */
8108                     rdev->has_uvd = 0;
8109                     return;
8110           }
8111           rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8112           r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8113 }
8114 
cik_uvd_start(struct radeon_device * rdev)8115 static void cik_uvd_start(struct radeon_device *rdev)
8116 {
8117           int r;
8118 
8119           if (!rdev->has_uvd)
8120                     return;
8121 
8122           r = radeon_uvd_resume(rdev);
8123           if (r) {
8124                     dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8125                     goto error;
8126           }
8127           r = uvd_v4_2_resume(rdev);
8128           if (r) {
8129                     dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8130                     goto error;
8131           }
8132           r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8133           if (r) {
8134                     dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8135                     goto error;
8136           }
8137           return;
8138 
8139 error:
8140           rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8141 }
8142 
cik_uvd_resume(struct radeon_device * rdev)8143 static void cik_uvd_resume(struct radeon_device *rdev)
8144 {
8145           struct radeon_ring *ring;
8146           int r;
8147 
8148           if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8149                     return;
8150 
8151           ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8152           r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8153           if (r) {
8154                     dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8155                     return;
8156           }
8157           r = uvd_v1_0_init(rdev);
8158           if (r) {
8159                     dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8160                     return;
8161           }
8162 }
8163 
cik_vce_init(struct radeon_device * rdev)8164 static void cik_vce_init(struct radeon_device *rdev)
8165 {
8166           int r;
8167 
8168           if (!rdev->has_vce)
8169                     return;
8170 
8171           r = radeon_vce_init(rdev);
8172           if (r) {
8173                     dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8174                     /*
8175                      * At this point rdev->vce.vcpu_bo is NULL which trickles down
8176                      * to early fails cik_vce_start() and thus nothing happens
8177                      * there. So it is pointless to try to go through that code
8178                      * hence why we disable vce here.
8179                      */
8180                     rdev->has_vce = 0;
8181                     return;
8182           }
8183           rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8184           r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8185           rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8186           r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8187 }
8188 
cik_vce_start(struct radeon_device * rdev)8189 static void cik_vce_start(struct radeon_device *rdev)
8190 {
8191           int r;
8192 
8193           if (!rdev->has_vce)
8194                     return;
8195 
8196           r = radeon_vce_resume(rdev);
8197           if (r) {
8198                     dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8199                     goto error;
8200           }
8201           r = vce_v2_0_resume(rdev);
8202           if (r) {
8203                     dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8204                     goto error;
8205           }
8206           r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8207           if (r) {
8208                     dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8209                     goto error;
8210           }
8211           r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8212           if (r) {
8213                     dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8214                     goto error;
8215           }
8216           return;
8217 
8218 error:
8219           rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8220           rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8221 }
8222 
cik_vce_resume(struct radeon_device * rdev)8223 static void cik_vce_resume(struct radeon_device *rdev)
8224 {
8225           struct radeon_ring *ring;
8226           int r;
8227 
8228           if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8229                     return;
8230 
8231           ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8232           r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8233           if (r) {
8234                     dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8235                     return;
8236           }
8237           ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8238           r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8239           if (r) {
8240                     dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8241                     return;
8242           }
8243           r = vce_v1_0_init(rdev);
8244           if (r) {
8245                     dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8246                     return;
8247           }
8248 }
8249 
8250 /**
8251  * cik_startup - program the asic to a functional state
8252  *
8253  * @rdev: radeon_device pointer
8254  *
8255  * Programs the asic to a functional state (CIK).
8256  * Called by cik_init() and cik_resume().
8257  * Returns 0 for success, error for failure.
8258  */
cik_startup(struct radeon_device * rdev)8259 static int cik_startup(struct radeon_device *rdev)
8260 {
8261           struct radeon_ring *ring;
8262           u32 nop;
8263           int r;
8264 
8265           /* enable pcie gen2/3 link */
8266           cik_pcie_gen3_enable(rdev);
8267           /* enable aspm */
8268           cik_program_aspm(rdev);
8269 
8270           /* scratch needs to be initialized before MC */
8271           r = r600_vram_scratch_init(rdev);
8272           if (r)
8273                     return r;
8274 
8275           cik_mc_program(rdev);
8276 
8277           if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8278                     r = ci_mc_load_microcode(rdev);
8279                     if (r) {
8280                               DRM_ERROR("Failed to load MC firmware!\n");
8281                               return r;
8282                     }
8283           }
8284 
8285           r = cik_pcie_gart_enable(rdev);
8286           if (r)
8287                     return r;
8288           cik_gpu_init(rdev);
8289 
8290           /* allocate rlc buffers */
8291           if (rdev->flags & RADEON_IS_IGP) {
8292                     if (rdev->family == CHIP_KAVERI) {
8293                               rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8294                               rdev->rlc.reg_list_size =
8295                                         (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8296                     } else {
8297                               rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8298                               rdev->rlc.reg_list_size =
8299                                         (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8300                     }
8301           }
8302           rdev->rlc.cs_data = ci_cs_data;
8303           rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8304           rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8305           r = sumo_rlc_init(rdev);
8306           if (r) {
8307                     DRM_ERROR("Failed to init rlc BOs!\n");
8308                     return r;
8309           }
8310 
8311           /* allocate wb buffer */
8312           r = radeon_wb_init(rdev);
8313           if (r)
8314                     return r;
8315 
8316           /* allocate mec buffers */
8317           r = cik_mec_init(rdev);
8318           if (r) {
8319                     DRM_ERROR("Failed to init MEC BOs!\n");
8320                     return r;
8321           }
8322 
8323           r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8324           if (r) {
8325                     dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8326                     return r;
8327           }
8328 
8329           r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8330           if (r) {
8331                     dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8332                     return r;
8333           }
8334 
8335           r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8336           if (r) {
8337                     dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8338                     return r;
8339           }
8340 
8341           r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8342           if (r) {
8343                     dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8344                     return r;
8345           }
8346 
8347           r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8348           if (r) {
8349                     dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8350                     return r;
8351           }
8352 
8353           cik_uvd_start(rdev);
8354           cik_vce_start(rdev);
8355 
8356           /* Enable IRQ */
8357           if (!rdev->irq.installed) {
8358                     r = radeon_irq_kms_init(rdev);
8359                     if (r)
8360                               return r;
8361           }
8362 
8363           r = cik_irq_init(rdev);
8364           if (r) {
8365                     DRM_ERROR("radeon: IH init failed (%d).\n", r);
8366                     radeon_irq_kms_fini(rdev);
8367                     return r;
8368           }
8369           cik_irq_set(rdev);
8370 
8371           if (rdev->family == CHIP_HAWAII) {
8372                     if (rdev->new_fw)
8373                               nop = PACKET3(PACKET3_NOP, 0x3FFF);
8374                     else
8375                               nop = RADEON_CP_PACKET2;
8376           } else {
8377                     nop = PACKET3(PACKET3_NOP, 0x3FFF);
8378           }
8379 
8380           ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8381           r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8382                                    nop);
8383           if (r)
8384                     return r;
8385 
8386           /* set up the compute queues */
8387           /* type-2 packets are deprecated on MEC, use type-3 instead */
8388           ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8389           r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8390                                    nop);
8391           if (r)
8392                     return r;
8393           ring->me = 1; /* first MEC */
8394           ring->pipe = 0; /* first pipe */
8395           ring->queue = 0; /* first queue */
8396           ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8397 
8398           /* type-2 packets are deprecated on MEC, use type-3 instead */
8399           ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8400           r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8401                                    nop);
8402           if (r)
8403                     return r;
8404           /* dGPU only have 1 MEC */
8405           ring->me = 1; /* first MEC */
8406           ring->pipe = 0; /* first pipe */
8407           ring->queue = 1; /* second queue */
8408           ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8409 
8410           ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8411           r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8412                                    SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8413           if (r)
8414                     return r;
8415 
8416           ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8417           r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8418                                    SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8419           if (r)
8420                     return r;
8421 
8422           r = cik_cp_resume(rdev);
8423           if (r)
8424                     return r;
8425 
8426           r = cik_sdma_resume(rdev);
8427           if (r)
8428                     return r;
8429 
8430           cik_uvd_resume(rdev);
8431           cik_vce_resume(rdev);
8432 
8433           r = radeon_ib_pool_init(rdev);
8434           if (r) {
8435                     dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8436                     return r;
8437           }
8438 
8439           r = radeon_vm_manager_init(rdev);
8440           if (r) {
8441                     dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8442                     return r;
8443           }
8444 
8445           r = radeon_audio_init(rdev);
8446           if (r)
8447                     return r;
8448 
8449           return 0;
8450 }
8451 
8452 /**
8453  * cik_resume - resume the asic to a functional state
8454  *
8455  * @rdev: radeon_device pointer
8456  *
8457  * Programs the asic to a functional state (CIK).
8458  * Called at resume.
8459  * Returns 0 for success, error for failure.
8460  */
cik_resume(struct radeon_device * rdev)8461 int cik_resume(struct radeon_device *rdev)
8462 {
8463           int r;
8464 
8465           /* post card */
8466           atom_asic_init(rdev->mode_info.atom_context);
8467 
8468           /* init golden registers */
8469           cik_init_golden_registers(rdev);
8470 
8471           if (rdev->pm.pm_method == PM_METHOD_DPM)
8472                     radeon_pm_resume(rdev);
8473 
8474           rdev->accel_working = true;
8475           r = cik_startup(rdev);
8476           if (r) {
8477                     DRM_ERROR("cik startup failed on resume\n");
8478                     rdev->accel_working = false;
8479                     return r;
8480           }
8481 
8482           return r;
8483 
8484 }
8485 
8486 /**
8487  * cik_suspend - suspend the asic
8488  *
8489  * @rdev: radeon_device pointer
8490  *
8491  * Bring the chip into a state suitable for suspend (CIK).
8492  * Called at suspend.
8493  * Returns 0 for success.
8494  */
cik_suspend(struct radeon_device * rdev)8495 int cik_suspend(struct radeon_device *rdev)
8496 {
8497           radeon_pm_suspend(rdev);
8498           radeon_audio_fini(rdev);
8499           radeon_vm_manager_fini(rdev);
8500           cik_cp_enable(rdev, false);
8501           cik_sdma_enable(rdev, false);
8502           if (rdev->has_uvd) {
8503                     uvd_v1_0_fini(rdev);
8504                     radeon_uvd_suspend(rdev);
8505           }
8506           if (rdev->has_vce)
8507                     radeon_vce_suspend(rdev);
8508           cik_fini_pg(rdev);
8509           cik_fini_cg(rdev);
8510           cik_irq_suspend(rdev);
8511           radeon_wb_disable(rdev);
8512           cik_pcie_gart_disable(rdev);
8513           return 0;
8514 }
8515 
8516 /* Plan is to move initialization in that function and use
8517  * helper function so that radeon_device_init pretty much
8518  * do nothing more than calling asic specific function. This
8519  * should also allow to remove a bunch of callback function
8520  * like vram_info.
8521  */
8522 /**
8523  * cik_init - asic specific driver and hw init
8524  *
8525  * @rdev: radeon_device pointer
8526  *
8527  * Setup asic specific driver variables and program the hw
8528  * to a functional state (CIK).
8529  * Called at driver startup.
8530  * Returns 0 for success, errors for failure.
8531  */
cik_init(struct radeon_device * rdev)8532 int cik_init(struct radeon_device *rdev)
8533 {
8534           struct radeon_ring *ring;
8535           int r;
8536 
8537           /* Read BIOS */
8538           if (!radeon_get_bios(rdev)) {
8539                     if (ASIC_IS_AVIVO(rdev))
8540                               return -EINVAL;
8541           }
8542           /* Must be an ATOMBIOS */
8543           if (!rdev->is_atom_bios) {
8544                     dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8545                     return -EINVAL;
8546           }
8547           r = radeon_atombios_init(rdev);
8548           if (r)
8549                     return r;
8550 
8551           /* Post card if necessary */
8552           if (!radeon_card_posted(rdev)) {
8553                     if (!rdev->bios) {
8554                               dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8555                               return -EINVAL;
8556                     }
8557                     DRM_INFO("GPU not posted. posting now...\n");
8558                     atom_asic_init(rdev->mode_info.atom_context);
8559           }
8560           /* init golden registers */
8561           cik_init_golden_registers(rdev);
8562           /* Initialize scratch registers */
8563           cik_scratch_init(rdev);
8564           /* Initialize surface registers */
8565           radeon_surface_init(rdev);
8566           /* Initialize clocks */
8567           radeon_get_clock_info(rdev->ddev);
8568 
8569           /* Fence driver */
8570           r = radeon_fence_driver_init(rdev);
8571           if (r)
8572                     return r;
8573 
8574           /* initialize memory controller */
8575           r = cik_mc_init(rdev);
8576           if (r)
8577                     return r;
8578           /* Memory manager */
8579           r = radeon_bo_init(rdev);
8580           if (r)
8581                     return r;
8582 
8583           if (rdev->flags & RADEON_IS_IGP) {
8584                     if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8585                         !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8586                               r = cik_init_microcode(rdev);
8587                               if (r) {
8588                                         DRM_ERROR("Failed to load firmware!\n");
8589                                         return r;
8590                               }
8591                     }
8592           } else {
8593                     if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8594                         !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8595                         !rdev->mc_fw) {
8596                               r = cik_init_microcode(rdev);
8597                               if (r) {
8598                                         DRM_ERROR("Failed to load firmware!\n");
8599                                         return r;
8600                               }
8601                     }
8602           }
8603 
8604           /* Initialize power management */
8605           radeon_pm_init(rdev);
8606 
8607           ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8608           ring->ring_obj = NULL;
8609           r600_ring_init(rdev, ring, 1024 * 1024);
8610 
8611           ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8612           ring->ring_obj = NULL;
8613           r600_ring_init(rdev, ring, 1024 * 1024);
8614           r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8615           if (r)
8616                     return r;
8617 
8618           ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8619           ring->ring_obj = NULL;
8620           r600_ring_init(rdev, ring, 1024 * 1024);
8621           r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8622           if (r)
8623                     return r;
8624 
8625           ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8626           ring->ring_obj = NULL;
8627           r600_ring_init(rdev, ring, 256 * 1024);
8628 
8629           ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8630           ring->ring_obj = NULL;
8631           r600_ring_init(rdev, ring, 256 * 1024);
8632 
8633           cik_uvd_init(rdev);
8634           cik_vce_init(rdev);
8635 
8636           rdev->ih.ring_obj = NULL;
8637           r600_ih_ring_init(rdev, 64 * 1024);
8638 
8639           r = r600_pcie_gart_init(rdev);
8640           if (r)
8641                     return r;
8642 
8643           rdev->accel_working = true;
8644           r = cik_startup(rdev);
8645           if (r) {
8646                     dev_err(rdev->dev, "disabling GPU acceleration\n");
8647                     cik_cp_fini(rdev);
8648                     cik_sdma_fini(rdev);
8649                     cik_irq_fini(rdev);
8650                     sumo_rlc_fini(rdev);
8651                     cik_mec_fini(rdev);
8652                     radeon_wb_fini(rdev);
8653                     radeon_ib_pool_fini(rdev);
8654                     radeon_vm_manager_fini(rdev);
8655                     radeon_irq_kms_fini(rdev);
8656                     cik_pcie_gart_fini(rdev);
8657                     rdev->accel_working = false;
8658           }
8659 
8660           /* Don't start up if the MC ucode is missing.
8661            * The default clocks and voltages before the MC ucode
8662            * is loaded are not suffient for advanced operations.
8663            */
8664           if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8665                     DRM_ERROR("radeon: MC ucode required for NI+.\n");
8666                     return -EINVAL;
8667           }
8668 
8669           return 0;
8670 }
8671 
8672 /**
8673  * cik_fini - asic specific driver and hw fini
8674  *
8675  * @rdev: radeon_device pointer
8676  *
8677  * Tear down the asic specific driver variables and program the hw
8678  * to an idle state (CIK).
8679  * Called at driver unload.
8680  */
cik_fini(struct radeon_device * rdev)8681 void cik_fini(struct radeon_device *rdev)
8682 {
8683           radeon_pm_fini(rdev);
8684           cik_cp_fini(rdev);
8685           cik_sdma_fini(rdev);
8686           cik_fini_pg(rdev);
8687           cik_fini_cg(rdev);
8688           cik_irq_fini(rdev);
8689           sumo_rlc_fini(rdev);
8690           cik_mec_fini(rdev);
8691           radeon_wb_fini(rdev);
8692           radeon_vm_manager_fini(rdev);
8693           radeon_ib_pool_fini(rdev);
8694           radeon_irq_kms_fini(rdev);
8695           uvd_v1_0_fini(rdev);
8696           radeon_uvd_fini(rdev);
8697           radeon_vce_fini(rdev);
8698           cik_pcie_gart_fini(rdev);
8699           r600_vram_scratch_fini(rdev);
8700           radeon_gem_fini(rdev);
8701           radeon_fence_driver_fini(rdev);
8702           radeon_bo_fini(rdev);
8703           radeon_atombios_fini(rdev);
8704           cik_fini_microcode(rdev);
8705           kfree(rdev->bios);
8706           rdev->bios = NULL;
8707 }
8708 
dce8_program_fmt(struct drm_encoder * encoder)8709 void dce8_program_fmt(struct drm_encoder *encoder)
8710 {
8711           struct drm_device *dev = encoder->dev;
8712           struct radeon_device *rdev = dev->dev_private;
8713           struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8714           struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8715           struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8716           int bpc = 0;
8717           u32 tmp = 0;
8718           enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8719 
8720           if (connector) {
8721                     struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8722                     bpc = radeon_get_monitor_bpc(connector);
8723                     dither = radeon_connector->dither;
8724           }
8725 
8726           /* LVDS/eDP FMT is set up by atom */
8727           if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8728                     return;
8729 
8730           /* not needed for analog */
8731           if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8732               (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8733                     return;
8734 
8735           if (bpc == 0)
8736                     return;
8737 
8738           switch (bpc) {
8739           case 6:
8740                     if (dither == RADEON_FMT_DITHER_ENABLE)
8741                               /* XXX sort out optimal dither settings */
8742                               tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8743                                         FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8744                     else
8745                               tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8746                     break;
8747           case 8:
8748                     if (dither == RADEON_FMT_DITHER_ENABLE)
8749                               /* XXX sort out optimal dither settings */
8750                               tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8751                                         FMT_RGB_RANDOM_ENABLE |
8752                                         FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8753                     else
8754                               tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8755                     break;
8756           case 10:
8757                     if (dither == RADEON_FMT_DITHER_ENABLE)
8758                               /* XXX sort out optimal dither settings */
8759                               tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8760                                         FMT_RGB_RANDOM_ENABLE |
8761                                         FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8762                     else
8763                               tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8764                     break;
8765           default:
8766                     /* not needed */
8767                     break;
8768           }
8769 
8770           WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8771 }
8772 
8773 /* display watermark setup */
8774 /**
8775  * dce8_line_buffer_adjust - Set up the line buffer
8776  *
8777  * @rdev: radeon_device pointer
8778  * @radeon_crtc: the selected display controller
8779  * @mode: the current display mode on the selected display
8780  * controller
8781  *
8782  * Setup up the line buffer allocation for
8783  * the selected display controller (CIK).
8784  * Returns the line buffer size in pixels.
8785  */
dce8_line_buffer_adjust(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,struct drm_display_mode * mode)8786 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8787                                            struct radeon_crtc *radeon_crtc,
8788                                            struct drm_display_mode *mode)
8789 {
8790           u32 tmp, buffer_alloc, i;
8791           u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8792           /*
8793            * Line Buffer Setup
8794            * There are 6 line buffers, one for each display controllers.
8795            * There are 3 partitions per LB. Select the number of partitions
8796            * to enable based on the display width.  For display widths larger
8797            * than 4096, you need use to use 2 display controllers and combine
8798            * them using the stereo blender.
8799            */
8800           if (radeon_crtc->base.enabled && mode) {
8801                     if (mode->crtc_hdisplay < 1920) {
8802                               tmp = 1;
8803                               buffer_alloc = 2;
8804                     } else if (mode->crtc_hdisplay < 2560) {
8805                               tmp = 2;
8806                               buffer_alloc = 2;
8807                     } else if (mode->crtc_hdisplay < 4096) {
8808                               tmp = 0;
8809                               buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8810                     } else {
8811                               DRM_DEBUG_KMS("Mode too big for LB!\n");
8812                               tmp = 0;
8813                               buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8814                     }
8815           } else {
8816                     tmp = 1;
8817                     buffer_alloc = 0;
8818           }
8819 
8820           WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8821                  LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8822 
8823           WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8824                  DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8825           for (i = 0; i < rdev->usec_timeout; i++) {
8826                     if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8827                         DMIF_BUFFERS_ALLOCATED_COMPLETED)
8828                               break;
8829                     udelay(1);
8830           }
8831 
8832           if (radeon_crtc->base.enabled && mode) {
8833                     switch (tmp) {
8834                     case 0:
8835                     default:
8836                               return 4096 * 2;
8837                     case 1:
8838                               return 1920 * 2;
8839                     case 2:
8840                               return 2560 * 2;
8841                     }
8842           }
8843 
8844           /* controller not enabled, so no lb used */
8845           return 0;
8846 }
8847 
8848 /**
8849  * cik_get_number_of_dram_channels - get the number of dram channels
8850  *
8851  * @rdev: radeon_device pointer
8852  *
8853  * Look up the number of video ram channels (CIK).
8854  * Used for display watermark bandwidth calculations
8855  * Returns the number of dram channels
8856  */
cik_get_number_of_dram_channels(struct radeon_device * rdev)8857 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8858 {
8859           u32 tmp = RREG32(MC_SHARED_CHMAP);
8860 
8861           switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8862           case 0:
8863           default:
8864                     return 1;
8865           case 1:
8866                     return 2;
8867           case 2:
8868                     return 4;
8869           case 3:
8870                     return 8;
8871           case 4:
8872                     return 3;
8873           case 5:
8874                     return 6;
8875           case 6:
8876                     return 10;
8877           case 7:
8878                     return 12;
8879           case 8:
8880                     return 16;
8881           }
8882 }
8883 
8884 struct dce8_wm_params {
8885           u32 dram_channels; /* number of dram channels */
8886           u32 yclk;          /* bandwidth per dram data pin in kHz */
8887           u32 sclk;          /* engine clock in kHz */
8888           u32 disp_clk;      /* display clock in kHz */
8889           u32 src_width;     /* viewport width */
8890           u32 active_time;   /* active display time in ns */
8891           u32 blank_time;    /* blank time in ns */
8892           bool interlaced;    /* mode is interlaced */
8893           fixed20_12 vsc;    /* vertical scale ratio */
8894           u32 num_heads;     /* number of active crtcs */
8895           u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8896           u32 lb_size;       /* line buffer allocated to pipe */
8897           u32 vtaps;         /* vertical scaler taps */
8898 };
8899 
8900 /**
8901  * dce8_dram_bandwidth - get the dram bandwidth
8902  *
8903  * @wm: watermark calculation data
8904  *
8905  * Calculate the raw dram bandwidth (CIK).
8906  * Used for display watermark bandwidth calculations
8907  * Returns the dram bandwidth in MBytes/s
8908  */
dce8_dram_bandwidth(struct dce8_wm_params * wm)8909 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8910 {
8911           /* Calculate raw DRAM Bandwidth */
8912           fixed20_12 dram_efficiency; /* 0.7 */
8913           fixed20_12 yclk, dram_channels, bandwidth;
8914           fixed20_12 a;
8915 
8916           a.full = dfixed_const(1000);
8917           yclk.full = dfixed_const(wm->yclk);
8918           yclk.full = dfixed_div(yclk, a);
8919           dram_channels.full = dfixed_const(wm->dram_channels * 4);
8920           a.full = dfixed_const(10);
8921           dram_efficiency.full = dfixed_const(7);
8922           dram_efficiency.full = dfixed_div(dram_efficiency, a);
8923           bandwidth.full = dfixed_mul(dram_channels, yclk);
8924           bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8925 
8926           return dfixed_trunc(bandwidth);
8927 }
8928 
8929 /**
8930  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8931  *
8932  * @wm: watermark calculation data
8933  *
8934  * Calculate the dram bandwidth used for display (CIK).
8935  * Used for display watermark bandwidth calculations
8936  * Returns the dram bandwidth for display in MBytes/s
8937  */
dce8_dram_bandwidth_for_display(struct dce8_wm_params * wm)8938 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8939 {
8940           /* Calculate DRAM Bandwidth and the part allocated to display. */
8941           fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8942           fixed20_12 yclk, dram_channels, bandwidth;
8943           fixed20_12 a;
8944 
8945           a.full = dfixed_const(1000);
8946           yclk.full = dfixed_const(wm->yclk);
8947           yclk.full = dfixed_div(yclk, a);
8948           dram_channels.full = dfixed_const(wm->dram_channels * 4);
8949           a.full = dfixed_const(10);
8950           disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8951           disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8952           bandwidth.full = dfixed_mul(dram_channels, yclk);
8953           bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8954 
8955           return dfixed_trunc(bandwidth);
8956 }
8957 
8958 /**
8959  * dce8_data_return_bandwidth - get the data return bandwidth
8960  *
8961  * @wm: watermark calculation data
8962  *
8963  * Calculate the data return bandwidth used for display (CIK).
8964  * Used for display watermark bandwidth calculations
8965  * Returns the data return bandwidth in MBytes/s
8966  */
dce8_data_return_bandwidth(struct dce8_wm_params * wm)8967 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8968 {
8969           /* Calculate the display Data return Bandwidth */
8970           fixed20_12 return_efficiency; /* 0.8 */
8971           fixed20_12 sclk, bandwidth;
8972           fixed20_12 a;
8973 
8974           a.full = dfixed_const(1000);
8975           sclk.full = dfixed_const(wm->sclk);
8976           sclk.full = dfixed_div(sclk, a);
8977           a.full = dfixed_const(10);
8978           return_efficiency.full = dfixed_const(8);
8979           return_efficiency.full = dfixed_div(return_efficiency, a);
8980           a.full = dfixed_const(32);
8981           bandwidth.full = dfixed_mul(a, sclk);
8982           bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8983 
8984           return dfixed_trunc(bandwidth);
8985 }
8986 
8987 /**
8988  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8989  *
8990  * @wm: watermark calculation data
8991  *
8992  * Calculate the dmif bandwidth used for display (CIK).
8993  * Used for display watermark bandwidth calculations
8994  * Returns the dmif bandwidth in MBytes/s
8995  */
dce8_dmif_request_bandwidth(struct dce8_wm_params * wm)8996 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8997 {
8998           /* Calculate the DMIF Request Bandwidth */
8999           fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9000           fixed20_12 disp_clk, bandwidth;
9001           fixed20_12 a, b;
9002 
9003           a.full = dfixed_const(1000);
9004           disp_clk.full = dfixed_const(wm->disp_clk);
9005           disp_clk.full = dfixed_div(disp_clk, a);
9006           a.full = dfixed_const(32);
9007           b.full = dfixed_mul(a, disp_clk);
9008 
9009           a.full = dfixed_const(10);
9010           disp_clk_request_efficiency.full = dfixed_const(8);
9011           disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9012 
9013           bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9014 
9015           return dfixed_trunc(bandwidth);
9016 }
9017 
9018 /**
9019  * dce8_available_bandwidth - get the min available bandwidth
9020  *
9021  * @wm: watermark calculation data
9022  *
9023  * Calculate the min available bandwidth used for display (CIK).
9024  * Used for display watermark bandwidth calculations
9025  * Returns the min available bandwidth in MBytes/s
9026  */
dce8_available_bandwidth(struct dce8_wm_params * wm)9027 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9028 {
9029           /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9030           u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9031           u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9032           u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9033 
9034           return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9035 }
9036 
9037 /**
9038  * dce8_average_bandwidth - get the average available bandwidth
9039  *
9040  * @wm: watermark calculation data
9041  *
9042  * Calculate the average available bandwidth used for display (CIK).
9043  * Used for display watermark bandwidth calculations
9044  * Returns the average available bandwidth in MBytes/s
9045  */
dce8_average_bandwidth(struct dce8_wm_params * wm)9046 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9047 {
9048           /* Calculate the display mode Average Bandwidth
9049            * DisplayMode should contain the source and destination dimensions,
9050            * timing, etc.
9051            */
9052           fixed20_12 bpp;
9053           fixed20_12 line_time;
9054           fixed20_12 src_width;
9055           fixed20_12 bandwidth;
9056           fixed20_12 a;
9057 
9058           a.full = dfixed_const(1000);
9059           line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9060           line_time.full = dfixed_div(line_time, a);
9061           bpp.full = dfixed_const(wm->bytes_per_pixel);
9062           src_width.full = dfixed_const(wm->src_width);
9063           bandwidth.full = dfixed_mul(src_width, bpp);
9064           bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9065           bandwidth.full = dfixed_div(bandwidth, line_time);
9066 
9067           return dfixed_trunc(bandwidth);
9068 }
9069 
9070 /**
9071  * dce8_latency_watermark - get the latency watermark
9072  *
9073  * @wm: watermark calculation data
9074  *
9075  * Calculate the latency watermark (CIK).
9076  * Used for display watermark bandwidth calculations
9077  * Returns the latency watermark in ns
9078  */
dce8_latency_watermark(struct dce8_wm_params * wm)9079 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9080 {
9081           /* First calculate the latency in ns */
9082           u32 mc_latency = 2000; /* 2000 ns. */
9083           u32 available_bandwidth = dce8_available_bandwidth(wm);
9084           u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9085           u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9086           u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9087           u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9088                     (wm->num_heads * cursor_line_pair_return_time);
9089           u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9090           u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9091           u32 tmp, dmif_size = 12288;
9092           fixed20_12 a, b, c;
9093 
9094           if (wm->num_heads == 0)
9095                     return 0;
9096 
9097           a.full = dfixed_const(2);
9098           b.full = dfixed_const(1);
9099           if ((wm->vsc.full > a.full) ||
9100               ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9101               (wm->vtaps >= 5) ||
9102               ((wm->vsc.full >= a.full) && wm->interlaced))
9103                     max_src_lines_per_dst_line = 4;
9104           else
9105                     max_src_lines_per_dst_line = 2;
9106 
9107           a.full = dfixed_const(available_bandwidth);
9108           b.full = dfixed_const(wm->num_heads);
9109           a.full = dfixed_div(a, b);
9110           tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9111           tmp = min(dfixed_trunc(a), tmp);
9112 
9113           lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9114 
9115           a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9116           b.full = dfixed_const(1000);
9117           c.full = dfixed_const(lb_fill_bw);
9118           b.full = dfixed_div(c, b);
9119           a.full = dfixed_div(a, b);
9120           line_fill_time = dfixed_trunc(a);
9121 
9122           if (line_fill_time < wm->active_time)
9123                     return latency;
9124           else
9125                     return latency + (line_fill_time - wm->active_time);
9126 
9127 }
9128 
9129 /**
9130  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9131  * average and available dram bandwidth
9132  *
9133  * @wm: watermark calculation data
9134  *
9135  * Check if the display average bandwidth fits in the display
9136  * dram bandwidth (CIK).
9137  * Used for display watermark bandwidth calculations
9138  * Returns true if the display fits, false if not.
9139  */
dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params * wm)9140 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9141 {
9142           if (dce8_average_bandwidth(wm) <=
9143               (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9144                     return true;
9145           else
9146                     return false;
9147 }
9148 
9149 /**
9150  * dce8_average_bandwidth_vs_available_bandwidth - check
9151  * average and available bandwidth
9152  *
9153  * @wm: watermark calculation data
9154  *
9155  * Check if the display average bandwidth fits in the display
9156  * available bandwidth (CIK).
9157  * Used for display watermark bandwidth calculations
9158  * Returns true if the display fits, false if not.
9159  */
dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params * wm)9160 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9161 {
9162           if (dce8_average_bandwidth(wm) <=
9163               (dce8_available_bandwidth(wm) / wm->num_heads))
9164                     return true;
9165           else
9166                     return false;
9167 }
9168 
9169 /**
9170  * dce8_check_latency_hiding - check latency hiding
9171  *
9172  * @wm: watermark calculation data
9173  *
9174  * Check latency hiding (CIK).
9175  * Used for display watermark bandwidth calculations
9176  * Returns true if the display fits, false if not.
9177  */
dce8_check_latency_hiding(struct dce8_wm_params * wm)9178 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9179 {
9180           u32 lb_partitions = wm->lb_size / wm->src_width;
9181           u32 line_time = wm->active_time + wm->blank_time;
9182           u32 latency_tolerant_lines;
9183           u32 latency_hiding;
9184           fixed20_12 a;
9185 
9186           a.full = dfixed_const(1);
9187           if (wm->vsc.full > a.full)
9188                     latency_tolerant_lines = 1;
9189           else {
9190                     if (lb_partitions <= (wm->vtaps + 1))
9191                               latency_tolerant_lines = 1;
9192                     else
9193                               latency_tolerant_lines = 2;
9194           }
9195 
9196           latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9197 
9198           if (dce8_latency_watermark(wm) <= latency_hiding)
9199                     return true;
9200           else
9201                     return false;
9202 }
9203 
9204 /**
9205  * dce8_program_watermarks - program display watermarks
9206  *
9207  * @rdev: radeon_device pointer
9208  * @radeon_crtc: the selected display controller
9209  * @lb_size: line buffer size
9210  * @num_heads: number of display controllers in use
9211  *
9212  * Calculate and program the display watermarks for the
9213  * selected display controller (CIK).
9214  */
dce8_program_watermarks(struct radeon_device * rdev,struct radeon_crtc * radeon_crtc,u32 lb_size,u32 num_heads)9215 static void dce8_program_watermarks(struct radeon_device *rdev,
9216                                             struct radeon_crtc *radeon_crtc,
9217                                             u32 lb_size, u32 num_heads)
9218 {
9219           struct drm_display_mode *mode = &radeon_crtc->base.mode;
9220           struct dce8_wm_params wm_low, wm_high;
9221           u32 active_time;
9222           u32 line_time = 0;
9223           u32 latency_watermark_a = 0, latency_watermark_b = 0;
9224           u32 tmp, wm_mask;
9225 
9226           if (radeon_crtc->base.enabled && num_heads && mode) {
9227                     active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9228                                                       (u32)mode->clock);
9229                     line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9230                                                     (u32)mode->clock);
9231                     line_time = min(line_time, (u32)65535);
9232 
9233                     /* watermark for high clocks */
9234                     if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9235                         rdev->pm.dpm_enabled) {
9236                               wm_high.yclk =
9237                                         radeon_dpm_get_mclk(rdev, false) * 10;
9238                               wm_high.sclk =
9239                                         radeon_dpm_get_sclk(rdev, false) * 10;
9240                     } else {
9241                               wm_high.yclk = rdev->pm.current_mclk * 10;
9242                               wm_high.sclk = rdev->pm.current_sclk * 10;
9243                     }
9244 
9245                     wm_high.disp_clk = mode->clock;
9246                     wm_high.src_width = mode->crtc_hdisplay;
9247                     wm_high.active_time = active_time;
9248                     wm_high.blank_time = line_time - wm_high.active_time;
9249                     wm_high.interlaced = false;
9250                     if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9251                               wm_high.interlaced = true;
9252                     wm_high.vsc = radeon_crtc->vsc;
9253                     wm_high.vtaps = 1;
9254                     if (radeon_crtc->rmx_type != RMX_OFF)
9255                               wm_high.vtaps = 2;
9256                     wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9257                     wm_high.lb_size = lb_size;
9258                     wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9259                     wm_high.num_heads = num_heads;
9260 
9261                     /* set for high clocks */
9262                     latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9263 
9264                     /* possibly force display priority to high */
9265                     /* should really do this at mode validation time... */
9266                     if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9267                         !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9268                         !dce8_check_latency_hiding(&wm_high) ||
9269                         (rdev->disp_priority == 2)) {
9270                               DRM_DEBUG_KMS("force priority to high\n");
9271                     }
9272 
9273                     /* watermark for low clocks */
9274                     if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9275                         rdev->pm.dpm_enabled) {
9276                               wm_low.yclk =
9277                                         radeon_dpm_get_mclk(rdev, true) * 10;
9278                               wm_low.sclk =
9279                                         radeon_dpm_get_sclk(rdev, true) * 10;
9280                     } else {
9281                               wm_low.yclk = rdev->pm.current_mclk * 10;
9282                               wm_low.sclk = rdev->pm.current_sclk * 10;
9283                     }
9284 
9285                     wm_low.disp_clk = mode->clock;
9286                     wm_low.src_width = mode->crtc_hdisplay;
9287                     wm_low.active_time = active_time;
9288                     wm_low.blank_time = line_time - wm_low.active_time;
9289                     wm_low.interlaced = false;
9290                     if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9291                               wm_low.interlaced = true;
9292                     wm_low.vsc = radeon_crtc->vsc;
9293                     wm_low.vtaps = 1;
9294                     if (radeon_crtc->rmx_type != RMX_OFF)
9295                               wm_low.vtaps = 2;
9296                     wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9297                     wm_low.lb_size = lb_size;
9298                     wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9299                     wm_low.num_heads = num_heads;
9300 
9301                     /* set for low clocks */
9302                     latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9303 
9304                     /* possibly force display priority to high */
9305                     /* should really do this at mode validation time... */
9306                     if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9307                         !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9308                         !dce8_check_latency_hiding(&wm_low) ||
9309                         (rdev->disp_priority == 2)) {
9310                               DRM_DEBUG_KMS("force priority to high\n");
9311                     }
9312 
9313                     /* Save number of lines the linebuffer leads before the scanout */
9314                     radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9315           }
9316 
9317           /* select wm A */
9318           wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9319           tmp = wm_mask;
9320           tmp &= ~LATENCY_WATERMARK_MASK(3);
9321           tmp |= LATENCY_WATERMARK_MASK(1);
9322           WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9323           WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9324                  (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9325                     LATENCY_HIGH_WATERMARK(line_time)));
9326           /* select wm B */
9327           tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9328           tmp &= ~LATENCY_WATERMARK_MASK(3);
9329           tmp |= LATENCY_WATERMARK_MASK(2);
9330           WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9331           WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9332                  (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9333                     LATENCY_HIGH_WATERMARK(line_time)));
9334           /* restore original selection */
9335           WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9336 
9337           /* save values for DPM */
9338           radeon_crtc->line_time = line_time;
9339           radeon_crtc->wm_high = latency_watermark_a;
9340           radeon_crtc->wm_low = latency_watermark_b;
9341 }
9342 
9343 /**
9344  * dce8_bandwidth_update - program display watermarks
9345  *
9346  * @rdev: radeon_device pointer
9347  *
9348  * Calculate and program the display watermarks and line
9349  * buffer allocation (CIK).
9350  */
dce8_bandwidth_update(struct radeon_device * rdev)9351 void dce8_bandwidth_update(struct radeon_device *rdev)
9352 {
9353           struct drm_display_mode *mode = NULL;
9354           u32 num_heads = 0, lb_size;
9355           int i;
9356 
9357           if (!rdev->mode_info.mode_config_initialized)
9358                     return;
9359 
9360           radeon_update_display_priority(rdev);
9361 
9362           for (i = 0; i < rdev->num_crtc; i++) {
9363                     if (rdev->mode_info.crtcs[i]->base.enabled)
9364                               num_heads++;
9365           }
9366           for (i = 0; i < rdev->num_crtc; i++) {
9367                     mode = &rdev->mode_info.crtcs[i]->base.mode;
9368                     lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9369                     dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9370           }
9371 }
9372 
9373 /**
9374  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9375  *
9376  * @rdev: radeon_device pointer
9377  *
9378  * Fetches a GPU clock counter snapshot (SI).
9379  * Returns the 64 bit clock counter snapshot.
9380  */
cik_get_gpu_clock_counter(struct radeon_device * rdev)9381 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9382 {
9383           uint64_t clock;
9384 
9385           mutex_lock(&rdev->gpu_clock_mutex);
9386           WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9387           clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9388                     ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9389           mutex_unlock(&rdev->gpu_clock_mutex);
9390           return clock;
9391 }
9392 
cik_set_uvd_clock(struct radeon_device * rdev,u32 clock,u32 cntl_reg,u32 status_reg)9393 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9394                                    u32 cntl_reg, u32 status_reg)
9395 {
9396           int r, i;
9397           struct atom_clock_dividers dividers;
9398           uint32_t tmp;
9399 
9400           r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9401                                                      clock, false, &dividers);
9402           if (r)
9403                     return r;
9404 
9405           tmp = RREG32_SMC(cntl_reg);
9406           tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9407           tmp |= dividers.post_divider;
9408           WREG32_SMC(cntl_reg, tmp);
9409 
9410           for (i = 0; i < 100; i++) {
9411                     if (RREG32_SMC(status_reg) & DCLK_STATUS)
9412                               break;
9413                     mdelay(10);
9414           }
9415           if (i == 100)
9416                     return -ETIMEDOUT;
9417 
9418           return 0;
9419 }
9420 
cik_set_uvd_clocks(struct radeon_device * rdev,u32 vclk,u32 dclk)9421 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9422 {
9423           int r = 0;
9424 
9425           r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9426           if (r)
9427                     return r;
9428 
9429           r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9430           return r;
9431 }
9432 
cik_set_vce_clocks(struct radeon_device * rdev,u32 evclk,u32 ecclk)9433 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9434 {
9435           int r, i;
9436           struct atom_clock_dividers dividers;
9437           u32 tmp;
9438 
9439           r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9440                                                      ecclk, false, &dividers);
9441           if (r)
9442                     return r;
9443 
9444           for (i = 0; i < 100; i++) {
9445                     if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9446                               break;
9447                     mdelay(10);
9448           }
9449           if (i == 100)
9450                     return -ETIMEDOUT;
9451 
9452           tmp = RREG32_SMC(CG_ECLK_CNTL);
9453           tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9454           tmp |= dividers.post_divider;
9455           WREG32_SMC(CG_ECLK_CNTL, tmp);
9456 
9457           for (i = 0; i < 100; i++) {
9458                     if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9459                               break;
9460                     mdelay(10);
9461           }
9462           if (i == 100)
9463                     return -ETIMEDOUT;
9464 
9465           return 0;
9466 }
9467 
cik_pcie_gen3_enable(struct radeon_device * rdev)9468 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9469 {
9470           struct pci_dev *root = rdev->pdev->bus->self;
9471           int bridge_pos, gpu_pos;
9472           u32 speed_cntl, mask, current_data_rate;
9473           int ret, i;
9474           u16 tmp16;
9475 
9476 #if 0
9477           if (pci_is_root_bus(rdev->pdev->bus))
9478                     return;
9479 #endif
9480 
9481           if (radeon_pcie_gen2 == 0)
9482                     return;
9483 
9484           if (rdev->flags & RADEON_IS_IGP)
9485                     return;
9486 
9487           if (!(rdev->flags & RADEON_IS_PCIE))
9488                     return;
9489 
9490           ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9491           if (ret != 0)
9492                     return;
9493 
9494           if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9495                     return;
9496 
9497           speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9498           current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9499                     LC_CURRENT_DATA_RATE_SHIFT;
9500           if (mask & DRM_PCIE_SPEED_80) {
9501                     if (current_data_rate == 2) {
9502                               DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9503                               return;
9504                     }
9505                     DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9506           } else if (mask & DRM_PCIE_SPEED_50) {
9507                     if (current_data_rate == 1) {
9508                               DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9509                               return;
9510                     }
9511                     DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9512           }
9513 
9514           bridge_pos = pci_pcie_cap(root);
9515           if (!bridge_pos)
9516                     return;
9517 
9518           gpu_pos = pci_pcie_cap(rdev->pdev);
9519           if (!gpu_pos)
9520                     return;
9521 
9522           if (mask & DRM_PCIE_SPEED_80) {
9523                     /* re-try equalization if gen3 is not already enabled */
9524                     if (current_data_rate != 2) {
9525                               u16 bridge_cfg, gpu_cfg;
9526                               u16 bridge_cfg2, gpu_cfg2;
9527                               u32 max_lw, current_lw, tmp;
9528 
9529                               pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9530                               pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9531 
9532                               tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9533                               pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9534 
9535                               tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9536                               pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9537 
9538                               tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9539                               max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9540                               current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9541 
9542                               if (current_lw < max_lw) {
9543                                         tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9544                                         if (tmp & LC_RENEGOTIATION_SUPPORT) {
9545                                                   tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9546                                                   tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9547                                                   tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9548                                                   WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9549                                         }
9550                               }
9551 
9552                               for (i = 0; i < 10; i++) {
9553                                         /* check status */
9554                                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9555                                         if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9556                                                   break;
9557 
9558                                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9559                                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9560 
9561                                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9562                                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9563 
9564                                         tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9565                                         tmp |= LC_SET_QUIESCE;
9566                                         WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9567 
9568                                         tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9569                                         tmp |= LC_REDO_EQ;
9570                                         WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9571 
9572                                         mdelay(100);
9573 
9574                                         /* linkctl */
9575                                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9576                                         tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9577                                         tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9578                                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9579 
9580                                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9581                                         tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9582                                         tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9583                                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9584 
9585                                         /* linkctl2 */
9586                                         pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9587                                         tmp16 &= ~((1 << 4) | (7 << 9));
9588                                         tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9589                                         pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9590 
9591                                         pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9592                                         tmp16 &= ~((1 << 4) | (7 << 9));
9593                                         tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9594                                         pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9595 
9596                                         tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9597                                         tmp &= ~LC_SET_QUIESCE;
9598                                         WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9599                               }
9600                     }
9601           }
9602 
9603           /* set the link speed */
9604           speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9605           speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9606           WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9607 
9608           pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9609           tmp16 &= ~0xf;
9610           if (mask & DRM_PCIE_SPEED_80)
9611                     tmp16 |= 3; /* gen3 */
9612           else if (mask & DRM_PCIE_SPEED_50)
9613                     tmp16 |= 2; /* gen2 */
9614           else
9615                     tmp16 |= 1; /* gen1 */
9616           pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9617 
9618           speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9619           speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9620           WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9621 
9622           for (i = 0; i < rdev->usec_timeout; i++) {
9623                     speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9624                     if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9625                               break;
9626                     udelay(1);
9627           }
9628 }
9629 
cik_program_aspm(struct radeon_device * rdev)9630 static void cik_program_aspm(struct radeon_device *rdev)
9631 {
9632           u32 data, orig;
9633           bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9634 #if 0
9635           bool disable_clkreq = false;
9636 #endif
9637 
9638           if (radeon_aspm == 0)
9639                     return;
9640 
9641           /* XXX double check IGPs */
9642           if (rdev->flags & RADEON_IS_IGP)
9643                     return;
9644 
9645           if (!(rdev->flags & RADEON_IS_PCIE))
9646                     return;
9647 
9648           orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9649           data &= ~LC_XMIT_N_FTS_MASK;
9650           data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9651           if (orig != data)
9652                     WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9653 
9654           orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9655           data |= LC_GO_TO_RECOVERY;
9656           if (orig != data)
9657                     WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9658 
9659           orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9660           data |= P_IGNORE_EDB_ERR;
9661           if (orig != data)
9662                     WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9663 
9664           orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9665           data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9666           data |= LC_PMI_TO_L1_DIS;
9667           if (!disable_l0s)
9668                     data |= LC_L0S_INACTIVITY(7);
9669 
9670           if (!disable_l1) {
9671                     data |= LC_L1_INACTIVITY(7);
9672                     data &= ~LC_PMI_TO_L1_DIS;
9673                     if (orig != data)
9674                               WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9675 
9676                     if (!disable_plloff_in_l1) {
9677                               bool clk_req_support;
9678 
9679                               orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9680                               data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9681                               data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9682                               if (orig != data)
9683                                         WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9684 
9685                               orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9686                               data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9687                               data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9688                               if (orig != data)
9689                                         WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9690 
9691                               orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9692                               data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9693                               data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9694                               if (orig != data)
9695                                         WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9696 
9697                               orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9698                               data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9699                               data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9700                               if (orig != data)
9701                                         WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9702 
9703                               orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9704                               data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9705                               data |= LC_DYN_LANES_PWR_STATE(3);
9706                               if (orig != data)
9707                                         WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9708 
9709 #ifdef zMN_TODO
9710                               if (!disable_clkreq &&
9711                                   !pci_is_root_bus(rdev->pdev->bus)) {
9712                                         struct pci_dev *root = rdev->pdev->bus->self;
9713                                         u32 lnkcap;
9714 
9715                                         clk_req_support = false;
9716                                         pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9717                                         if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9718                                                   clk_req_support = true;
9719                               } else {
9720                                         clk_req_support = false;
9721                               }
9722 #else
9723                               clk_req_support = false;
9724 #endif
9725 
9726                               if (clk_req_support) {
9727                                         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9728                                         data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9729                                         if (orig != data)
9730                                                   WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9731 
9732                                         orig = data = RREG32_SMC(THM_CLK_CNTL);
9733                                         data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9734                                         data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9735                                         if (orig != data)
9736                                                   WREG32_SMC(THM_CLK_CNTL, data);
9737 
9738                                         orig = data = RREG32_SMC(MISC_CLK_CTRL);
9739                                         data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9740                                         data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9741                                         if (orig != data)
9742                                                   WREG32_SMC(MISC_CLK_CTRL, data);
9743 
9744                                         orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9745                                         data &= ~BCLK_AS_XCLK;
9746                                         if (orig != data)
9747                                                   WREG32_SMC(CG_CLKPIN_CNTL, data);
9748 
9749                                         orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9750                                         data &= ~FORCE_BIF_REFCLK_EN;
9751                                         if (orig != data)
9752                                                   WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9753 
9754                                         orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9755                                         data &= ~MPLL_CLKOUT_SEL_MASK;
9756                                         data |= MPLL_CLKOUT_SEL(4);
9757                                         if (orig != data)
9758                                                   WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9759                               }
9760                     }
9761           } else {
9762                     if (orig != data)
9763                               WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9764           }
9765 
9766           orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9767           data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9768           if (orig != data)
9769                     WREG32_PCIE_PORT(PCIE_CNTL2, data);
9770 
9771           if (!disable_l0s) {
9772                     data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9773                     if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9774                               data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9775                               if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9776                                         orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9777                                         data &= ~LC_L0S_INACTIVITY_MASK;
9778                                         if (orig != data)
9779                                                   WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9780                               }
9781                     }
9782           }
9783 }
9784