xref: /dragonfly/sys/dev/drm/radeon/evergreen_cs.c (revision a85cb24f18e3804e75ab8bcda7692564d0563317)
1 /*
2  * Copyright 2010 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <drm/drmP.h>
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "evergreend.h"
32 #include "evergreen_reg_safe.h"
33 #include "cayman_reg_safe.h"
34 
35 #define MAX(a,b)                   (((a)>(b))?(a):(b))
36 #define MIN(a,b)                   (((a)<(b))?(a):(b))
37 
38 #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm)
39 
40 struct evergreen_cs_track {
41           u32                           group_size;
42           u32                           nbanks;
43           u32                           npipes;
44           u32                           row_size;
45           /* value we track */
46           u32                           nsamples;           /* unused */
47           struct radeon_bo    *cb_color_bo[12];
48           u32                           cb_color_bo_offset[12];
49           struct radeon_bo    *cb_color_fmask_bo[8];        /* unused */
50           struct radeon_bo    *cb_color_cmask_bo[8];        /* unused */
51           u32                           cb_color_info[12];
52           u32                           cb_color_view[12];
53           u32                           cb_color_pitch[12];
54           u32                           cb_color_slice[12];
55           u32                           cb_color_slice_idx[12];
56           u32                           cb_color_attrib[12];
57           u32                           cb_color_cmask_slice[8];/* unused */
58           u32                           cb_color_fmask_slice[8];/* unused */
59           u32                           cb_target_mask;
60           u32                           cb_shader_mask; /* unused */
61           u32                           vgt_strmout_config;
62           u32                           vgt_strmout_buffer_config;
63           struct radeon_bo    *vgt_strmout_bo[4];
64           u32                           vgt_strmout_bo_offset[4];
65           u32                           vgt_strmout_size[4];
66           u32                           db_depth_control;
67           u32                           db_depth_view;
68           u32                           db_depth_slice;
69           u32                           db_depth_size;
70           u32                           db_z_info;
71           u32                           db_z_read_offset;
72           u32                           db_z_write_offset;
73           struct radeon_bo    *db_z_read_bo;
74           struct radeon_bo    *db_z_write_bo;
75           u32                           db_s_info;
76           u32                           db_s_read_offset;
77           u32                           db_s_write_offset;
78           struct radeon_bo    *db_s_read_bo;
79           struct radeon_bo    *db_s_write_bo;
80           bool                          sx_misc_kill_all_prims;
81           bool                          cb_dirty;
82           bool                          db_dirty;
83           bool                          streamout_dirty;
84           u32                           htile_offset;
85           u32                           htile_surface;
86           struct radeon_bo    *htile_bo;
87           unsigned long                 indirect_draw_buffer_size;
88           const unsigned                *reg_safe_bm;
89 };
90 
evergreen_cs_get_aray_mode(u32 tiling_flags)91 static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
92 {
93           if (tiling_flags & RADEON_TILING_MACRO)
94                     return ARRAY_2D_TILED_THIN1;
95           else if (tiling_flags & RADEON_TILING_MICRO)
96                     return ARRAY_1D_TILED_THIN1;
97           else
98                     return ARRAY_LINEAR_GENERAL;
99 }
100 
evergreen_cs_get_num_banks(u32 nbanks)101 static u32 evergreen_cs_get_num_banks(u32 nbanks)
102 {
103           switch (nbanks) {
104           case 2:
105                     return ADDR_SURF_2_BANK;
106           case 4:
107                     return ADDR_SURF_4_BANK;
108           case 8:
109           default:
110                     return ADDR_SURF_8_BANK;
111           case 16:
112                     return ADDR_SURF_16_BANK;
113           }
114 }
115 
evergreen_cs_track_init(struct evergreen_cs_track * track)116 static void evergreen_cs_track_init(struct evergreen_cs_track *track)
117 {
118           int i;
119 
120           for (i = 0; i < 8; i++) {
121                     track->cb_color_fmask_bo[i] = NULL;
122                     track->cb_color_cmask_bo[i] = NULL;
123                     track->cb_color_cmask_slice[i] = 0;
124                     track->cb_color_fmask_slice[i] = 0;
125           }
126 
127           for (i = 0; i < 12; i++) {
128                     track->cb_color_bo[i] = NULL;
129                     track->cb_color_bo_offset[i] = 0xFFFFFFFF;
130                     track->cb_color_info[i] = 0;
131                     track->cb_color_view[i] = 0xFFFFFFFF;
132                     track->cb_color_pitch[i] = 0;
133                     track->cb_color_slice[i] = 0xfffffff;
134                     track->cb_color_slice_idx[i] = 0;
135           }
136           track->cb_target_mask = 0xFFFFFFFF;
137           track->cb_shader_mask = 0xFFFFFFFF;
138           track->cb_dirty = true;
139 
140           track->db_depth_slice = 0xffffffff;
141           track->db_depth_view = 0xFFFFC000;
142           track->db_depth_size = 0xFFFFFFFF;
143           track->db_depth_control = 0xFFFFFFFF;
144           track->db_z_info = 0xFFFFFFFF;
145           track->db_z_read_offset = 0xFFFFFFFF;
146           track->db_z_write_offset = 0xFFFFFFFF;
147           track->db_z_read_bo = NULL;
148           track->db_z_write_bo = NULL;
149           track->db_s_info = 0xFFFFFFFF;
150           track->db_s_read_offset = 0xFFFFFFFF;
151           track->db_s_write_offset = 0xFFFFFFFF;
152           track->db_s_read_bo = NULL;
153           track->db_s_write_bo = NULL;
154           track->db_dirty = true;
155           track->htile_bo = NULL;
156           track->htile_offset = 0xFFFFFFFF;
157           track->htile_surface = 0;
158 
159           for (i = 0; i < 4; i++) {
160                     track->vgt_strmout_size[i] = 0;
161                     track->vgt_strmout_bo[i] = NULL;
162                     track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
163           }
164           track->streamout_dirty = true;
165           track->sx_misc_kill_all_prims = false;
166 }
167 
168 struct eg_surface {
169           /* value gathered from cs */
170           unsigned  nbx;
171           unsigned  nby;
172           unsigned  format;
173           unsigned  mode;
174           unsigned  nbanks;
175           unsigned  bankw;
176           unsigned  bankh;
177           unsigned  tsplit;
178           unsigned  mtilea;
179           unsigned  nsamples;
180           /* output value */
181           unsigned  bpe;
182           unsigned  layer_size;
183           unsigned  palign;
184           unsigned  halign;
185           unsigned long       base_align;
186 };
187 
evergreen_surface_check_linear(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)188 static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
189                                                     struct eg_surface *surf,
190                                                     const char *prefix)
191 {
192           surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
193           surf->base_align = surf->bpe;
194           surf->palign = 1;
195           surf->halign = 1;
196           return 0;
197 }
198 
evergreen_surface_check_linear_aligned(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)199 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
200                                                               struct eg_surface *surf,
201                                                               const char *prefix)
202 {
203           struct evergreen_cs_track *track = p->track;
204           unsigned palign;
205 
206           palign = MAX(64, track->group_size / surf->bpe);
207           surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
208           surf->base_align = track->group_size;
209           surf->palign = palign;
210           surf->halign = 1;
211           if (surf->nbx & (palign - 1)) {
212                     if (prefix) {
213                               dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
214                                          __func__, __LINE__, prefix, surf->nbx, palign);
215                     }
216                     return -EINVAL;
217           }
218           return 0;
219 }
220 
evergreen_surface_check_1d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)221 static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
222                                               struct eg_surface *surf,
223                                               const char *prefix)
224 {
225           struct evergreen_cs_track *track = p->track;
226           unsigned palign;
227 
228           palign = track->group_size / (8 * surf->bpe * surf->nsamples);
229           palign = MAX(8, palign);
230           surf->layer_size = surf->nbx * surf->nby * surf->bpe;
231           surf->base_align = track->group_size;
232           surf->palign = palign;
233           surf->halign = 8;
234           if ((surf->nbx & (palign - 1))) {
235                     if (prefix) {
236                               dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
237                                          __func__, __LINE__, prefix, surf->nbx, palign,
238                                          track->group_size, surf->bpe, surf->nsamples);
239                     }
240                     return -EINVAL;
241           }
242           if ((surf->nby & (8 - 1))) {
243                     if (prefix) {
244                               dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
245                                          __func__, __LINE__, prefix, surf->nby);
246                     }
247                     return -EINVAL;
248           }
249           return 0;
250 }
251 
evergreen_surface_check_2d(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)252 static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
253                                               struct eg_surface *surf,
254                                               const char *prefix)
255 {
256           struct evergreen_cs_track *track = p->track;
257           unsigned palign, halign, tileb, slice_pt;
258           unsigned mtile_pr, mtile_ps, mtileb;
259 
260           tileb = 64 * surf->bpe * surf->nsamples;
261           slice_pt = 1;
262           if (tileb > surf->tsplit) {
263                     slice_pt = tileb / surf->tsplit;
264           }
265           tileb = tileb / slice_pt;
266           /* macro tile width & height */
267           palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
268           halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
269           mtileb = (palign / 8) * (halign / 8) * tileb;
270           mtile_pr = surf->nbx / palign;
271           mtile_ps = (mtile_pr * surf->nby) / halign;
272           surf->layer_size = mtile_ps * mtileb * slice_pt;
273           surf->base_align = (palign / 8) * (halign / 8) * tileb;
274           surf->palign = palign;
275           surf->halign = halign;
276 
277           if ((surf->nbx & (palign - 1))) {
278                     if (prefix) {
279                               dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
280                                          __func__, __LINE__, prefix, surf->nbx, palign);
281                     }
282                     return -EINVAL;
283           }
284           if ((surf->nby & (halign - 1))) {
285                     if (prefix) {
286                               dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
287                                          __func__, __LINE__, prefix, surf->nby, halign);
288                     }
289                     return -EINVAL;
290           }
291 
292           return 0;
293 }
294 
evergreen_surface_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)295 static int evergreen_surface_check(struct radeon_cs_parser *p,
296                                            struct eg_surface *surf,
297                                            const char *prefix)
298 {
299           /* some common value computed here */
300           surf->bpe = r600_fmt_get_blocksize(surf->format);
301 
302           switch (surf->mode) {
303           case ARRAY_LINEAR_GENERAL:
304                     return evergreen_surface_check_linear(p, surf, prefix);
305           case ARRAY_LINEAR_ALIGNED:
306                     return evergreen_surface_check_linear_aligned(p, surf, prefix);
307           case ARRAY_1D_TILED_THIN1:
308                     return evergreen_surface_check_1d(p, surf, prefix);
309           case ARRAY_2D_TILED_THIN1:
310                     return evergreen_surface_check_2d(p, surf, prefix);
311           default:
312                     dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
313                                         __func__, __LINE__, prefix, surf->mode);
314                     return -EINVAL;
315           }
316           return -EINVAL;
317 }
318 
evergreen_surface_value_conv_check(struct radeon_cs_parser * p,struct eg_surface * surf,const char * prefix)319 static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
320                                                         struct eg_surface *surf,
321                                                         const char *prefix)
322 {
323           switch (surf->mode) {
324           case ARRAY_2D_TILED_THIN1:
325                     break;
326           case ARRAY_LINEAR_GENERAL:
327           case ARRAY_LINEAR_ALIGNED:
328           case ARRAY_1D_TILED_THIN1:
329                     return 0;
330           default:
331                     dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
332                                         __func__, __LINE__, prefix, surf->mode);
333                     return -EINVAL;
334           }
335 
336           switch (surf->nbanks) {
337           case 0: surf->nbanks = 2; break;
338           case 1: surf->nbanks = 4; break;
339           case 2: surf->nbanks = 8; break;
340           case 3: surf->nbanks = 16; break;
341           default:
342                     dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
343                                __func__, __LINE__, prefix, surf->nbanks);
344                     return -EINVAL;
345           }
346           switch (surf->bankw) {
347           case 0: surf->bankw = 1; break;
348           case 1: surf->bankw = 2; break;
349           case 2: surf->bankw = 4; break;
350           case 3: surf->bankw = 8; break;
351           default:
352                     dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
353                                __func__, __LINE__, prefix, surf->bankw);
354                     return -EINVAL;
355           }
356           switch (surf->bankh) {
357           case 0: surf->bankh = 1; break;
358           case 1: surf->bankh = 2; break;
359           case 2: surf->bankh = 4; break;
360           case 3: surf->bankh = 8; break;
361           default:
362                     dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
363                                __func__, __LINE__, prefix, surf->bankh);
364                     return -EINVAL;
365           }
366           switch (surf->mtilea) {
367           case 0: surf->mtilea = 1; break;
368           case 1: surf->mtilea = 2; break;
369           case 2: surf->mtilea = 4; break;
370           case 3: surf->mtilea = 8; break;
371           default:
372                     dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
373                                __func__, __LINE__, prefix, surf->mtilea);
374                     return -EINVAL;
375           }
376           switch (surf->tsplit) {
377           case 0: surf->tsplit = 64; break;
378           case 1: surf->tsplit = 128; break;
379           case 2: surf->tsplit = 256; break;
380           case 3: surf->tsplit = 512; break;
381           case 4: surf->tsplit = 1024; break;
382           case 5: surf->tsplit = 2048; break;
383           case 6: surf->tsplit = 4096; break;
384           default:
385                     dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
386                                __func__, __LINE__, prefix, surf->tsplit);
387                     return -EINVAL;
388           }
389           return 0;
390 }
391 
evergreen_cs_track_validate_cb(struct radeon_cs_parser * p,unsigned id)392 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
393 {
394           struct evergreen_cs_track *track = p->track;
395           struct eg_surface surf;
396           unsigned pitch, slice, mslice;
397           unsigned long offset;
398           int r;
399 
400           mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
401           pitch = track->cb_color_pitch[id];
402           slice = track->cb_color_slice[id];
403           surf.nbx = (pitch + 1) * 8;
404           surf.nby = ((slice + 1) * 64) / surf.nbx;
405           surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
406           surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
407           surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
408           surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
409           surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
410           surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
411           surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
412           surf.nsamples = 1;
413 
414           if (!r600_fmt_is_valid_color(surf.format)) {
415                     dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
416                                __func__, __LINE__, surf.format,
417                               id, track->cb_color_info[id]);
418                     return -EINVAL;
419           }
420 
421           r = evergreen_surface_value_conv_check(p, &surf, "cb");
422           if (r) {
423                     return r;
424           }
425 
426           r = evergreen_surface_check(p, &surf, "cb");
427           if (r) {
428                     dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
429                                __func__, __LINE__, id, track->cb_color_pitch[id],
430                                track->cb_color_slice[id], track->cb_color_attrib[id],
431                                track->cb_color_info[id]);
432                     return r;
433           }
434 
435           offset = track->cb_color_bo_offset[id] << 8;
436           if (offset & (surf.base_align - 1)) {
437                     dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
438                                __func__, __LINE__, id, offset, surf.base_align);
439                     return -EINVAL;
440           }
441 
442           offset += surf.layer_size * mslice;
443           if (offset > radeon_bo_size(track->cb_color_bo[id])) {
444                     /* old ddx are broken they allocate bo with w*h*bpp but
445                      * program slice with ALIGN(h, 8), catch this and patch
446                      * command stream.
447                      */
448                     if (!surf.mode) {
449                               uint32_t *ib = p->ib.ptr;
450                               unsigned long tmp, nby, bsize, size, min = 0;
451 
452                               /* find the height the ddx wants */
453                               if (surf.nby > 8) {
454                                         min = surf.nby - 8;
455                               }
456                               bsize = radeon_bo_size(track->cb_color_bo[id]);
457                               tmp = track->cb_color_bo_offset[id] << 8;
458                               for (nby = surf.nby; nby > min; nby--) {
459                                         size = nby * surf.nbx * surf.bpe * surf.nsamples;
460                                         if ((tmp + size * mslice) <= bsize) {
461                                                   break;
462                                         }
463                               }
464                               if (nby > min) {
465                                         surf.nby = nby;
466                                         slice = ((nby * surf.nbx) / 64) - 1;
467                                         if (!evergreen_surface_check(p, &surf, "cb")) {
468                                                   /* check if this one works */
469                                                   tmp += surf.layer_size * mslice;
470                                                   if (tmp <= bsize) {
471                                                             ib[track->cb_color_slice_idx[id]] = slice;
472                                                             goto old_ddx_ok;
473                                                   }
474                                         }
475                               }
476                     }
477                     dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
478                                "offset %d, max layer %d, bo size %ld, slice %d)\n",
479                                __func__, __LINE__, id, surf.layer_size,
480                               track->cb_color_bo_offset[id] << 8, mslice,
481                               radeon_bo_size(track->cb_color_bo[id]), slice);
482                     dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
483                                __func__, __LINE__, surf.nbx, surf.nby,
484                               surf.mode, surf.bpe, surf.nsamples,
485                               surf.bankw, surf.bankh,
486                               surf.tsplit, surf.mtilea);
487                     return -EINVAL;
488           }
489 old_ddx_ok:
490 
491           return 0;
492 }
493 
evergreen_cs_track_validate_htile(struct radeon_cs_parser * p,unsigned nbx,unsigned nby)494 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
495                                                             unsigned nbx, unsigned nby)
496 {
497           struct evergreen_cs_track *track = p->track;
498           unsigned long size;
499 
500           if (track->htile_bo == NULL) {
501                     dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
502                                         __func__, __LINE__, track->db_z_info);
503                     return -EINVAL;
504           }
505 
506           if (G_028ABC_LINEAR(track->htile_surface)) {
507                     /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
508                     nbx = round_up(nbx, 16 * 8);
509                     /* height is npipes htiles aligned == npipes * 8 pixel aligned */
510                     nby = round_up(nby, track->npipes * 8);
511           } else {
512                     /* always assume 8x8 htile */
513                     /* align is htile align * 8, htile align vary according to
514                      * number of pipe and tile width and nby
515                      */
516                     switch (track->npipes) {
517                     case 8:
518                               /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
519                               nbx = round_up(nbx, 64 * 8);
520                               nby = round_up(nby, 64 * 8);
521                               break;
522                     case 4:
523                               /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524                               nbx = round_up(nbx, 64 * 8);
525                               nby = round_up(nby, 32 * 8);
526                               break;
527                     case 2:
528                               /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
529                               nbx = round_up(nbx, 32 * 8);
530                               nby = round_up(nby, 32 * 8);
531                               break;
532                     case 1:
533                               /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
534                               nbx = round_up(nbx, 32 * 8);
535                               nby = round_up(nby, 16 * 8);
536                               break;
537                     default:
538                               dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
539                                                   __func__, __LINE__, track->npipes);
540                               return -EINVAL;
541                     }
542           }
543           /* compute number of htile */
544           nbx = nbx >> 3;
545           nby = nby >> 3;
546           /* size must be aligned on npipes * 2K boundary */
547           size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
548           size += track->htile_offset;
549 
550           if (size > radeon_bo_size(track->htile_bo)) {
551                     dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
552                                         __func__, __LINE__, radeon_bo_size(track->htile_bo),
553                                         size, nbx, nby);
554                     return -EINVAL;
555           }
556           return 0;
557 }
558 
evergreen_cs_track_validate_stencil(struct radeon_cs_parser * p)559 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
560 {
561           struct evergreen_cs_track *track = p->track;
562           struct eg_surface surf;
563           unsigned pitch, slice, mslice;
564           unsigned long offset;
565           int r;
566 
567           mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
568           pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
569           slice = track->db_depth_slice;
570           surf.nbx = (pitch + 1) * 8;
571           surf.nby = ((slice + 1) * 64) / surf.nbx;
572           surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
573           surf.format = G_028044_FORMAT(track->db_s_info);
574           surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
575           surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
576           surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
577           surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
578           surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
579           surf.nsamples = 1;
580 
581           if (surf.format != 1) {
582                     dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
583                                __func__, __LINE__, surf.format);
584                     return -EINVAL;
585           }
586           /* replace by color format so we can use same code */
587           surf.format = V_028C70_COLOR_8;
588 
589           r = evergreen_surface_value_conv_check(p, &surf, "stencil");
590           if (r) {
591                     return r;
592           }
593 
594           r = evergreen_surface_check(p, &surf, NULL);
595           if (r) {
596                     /* old userspace doesn't compute proper depth/stencil alignment
597                      * check that alignment against a bigger byte per elements and
598                      * only report if that alignment is wrong too.
599                      */
600                     surf.format = V_028C70_COLOR_8_8_8_8;
601                     r = evergreen_surface_check(p, &surf, "stencil");
602                     if (r) {
603                               dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
604                                          __func__, __LINE__, track->db_depth_size,
605                                          track->db_depth_slice, track->db_s_info, track->db_z_info);
606                     }
607                     return r;
608           }
609 
610           offset = track->db_s_read_offset << 8;
611           if (offset & (surf.base_align - 1)) {
612                     dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
613                                __func__, __LINE__, offset, surf.base_align);
614                     return -EINVAL;
615           }
616           offset += surf.layer_size * mslice;
617           if (offset > radeon_bo_size(track->db_s_read_bo)) {
618                     dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
619                                "offset %ld, max layer %d, bo size %ld)\n",
620                                __func__, __LINE__, surf.layer_size,
621                               (unsigned long)track->db_s_read_offset << 8, mslice,
622                               radeon_bo_size(track->db_s_read_bo));
623                     dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
624                                __func__, __LINE__, track->db_depth_size,
625                                track->db_depth_slice, track->db_s_info, track->db_z_info);
626                     return -EINVAL;
627           }
628 
629           offset = track->db_s_write_offset << 8;
630           if (offset & (surf.base_align - 1)) {
631                     dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
632                                __func__, __LINE__, offset, surf.base_align);
633                     return -EINVAL;
634           }
635           offset += surf.layer_size * mslice;
636           if (offset > radeon_bo_size(track->db_s_write_bo)) {
637                     dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
638                                "offset %ld, max layer %d, bo size %ld)\n",
639                                __func__, __LINE__, surf.layer_size,
640                               (unsigned long)track->db_s_write_offset << 8, mslice,
641                               radeon_bo_size(track->db_s_write_bo));
642                     return -EINVAL;
643           }
644 
645           /* hyperz */
646           if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
647                     r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
648                     if (r) {
649                               return r;
650                     }
651           }
652 
653           return 0;
654 }
655 
evergreen_cs_track_validate_depth(struct radeon_cs_parser * p)656 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
657 {
658           struct evergreen_cs_track *track = p->track;
659           struct eg_surface surf;
660           unsigned pitch, slice, mslice;
661           unsigned long offset;
662           int r;
663 
664           mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
665           pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
666           slice = track->db_depth_slice;
667           surf.nbx = (pitch + 1) * 8;
668           surf.nby = ((slice + 1) * 64) / surf.nbx;
669           surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
670           surf.format = G_028040_FORMAT(track->db_z_info);
671           surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
672           surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
673           surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
674           surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
675           surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
676           surf.nsamples = 1;
677 
678           switch (surf.format) {
679           case V_028040_Z_16:
680                     surf.format = V_028C70_COLOR_16;
681                     break;
682           case V_028040_Z_24:
683           case V_028040_Z_32_FLOAT:
684                     surf.format = V_028C70_COLOR_8_8_8_8;
685                     break;
686           default:
687                     dev_warn(p->dev, "%s:%d depth invalid format %d\n",
688                                __func__, __LINE__, surf.format);
689                     return -EINVAL;
690           }
691 
692           r = evergreen_surface_value_conv_check(p, &surf, "depth");
693           if (r) {
694                     dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
695                                __func__, __LINE__, track->db_depth_size,
696                                track->db_depth_slice, track->db_z_info);
697                     return r;
698           }
699 
700           r = evergreen_surface_check(p, &surf, "depth");
701           if (r) {
702                     dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
703                                __func__, __LINE__, track->db_depth_size,
704                                track->db_depth_slice, track->db_z_info);
705                     return r;
706           }
707 
708           offset = track->db_z_read_offset << 8;
709           if (offset & (surf.base_align - 1)) {
710                     dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
711                                __func__, __LINE__, offset, surf.base_align);
712                     return -EINVAL;
713           }
714           offset += surf.layer_size * mslice;
715           if (offset > radeon_bo_size(track->db_z_read_bo)) {
716                     dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
717                                "offset %ld, max layer %d, bo size %ld)\n",
718                                __func__, __LINE__, surf.layer_size,
719                               (unsigned long)track->db_z_read_offset << 8, mslice,
720                               radeon_bo_size(track->db_z_read_bo));
721                     return -EINVAL;
722           }
723 
724           offset = track->db_z_write_offset << 8;
725           if (offset & (surf.base_align - 1)) {
726                     dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
727                                __func__, __LINE__, offset, surf.base_align);
728                     return -EINVAL;
729           }
730           offset += surf.layer_size * mslice;
731           if (offset > radeon_bo_size(track->db_z_write_bo)) {
732                     dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
733                                "offset %ld, max layer %d, bo size %ld)\n",
734                                __func__, __LINE__, surf.layer_size,
735                               (unsigned long)track->db_z_write_offset << 8, mslice,
736                               radeon_bo_size(track->db_z_write_bo));
737                     return -EINVAL;
738           }
739 
740           /* hyperz */
741           if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
742                     r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
743                     if (r) {
744                               return r;
745                     }
746           }
747 
748           return 0;
749 }
750 
evergreen_cs_track_validate_texture(struct radeon_cs_parser * p,struct radeon_bo * texture,struct radeon_bo * mipmap,unsigned idx)751 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
752                                                          struct radeon_bo *texture,
753                                                          struct radeon_bo *mipmap,
754                                                          unsigned idx)
755 {
756           struct eg_surface surf;
757           unsigned long toffset, moffset;
758           unsigned dim, llevel, mslice, width, height, depth, i;
759           u32 texdw[8];
760           int r;
761 
762           texdw[0] = radeon_get_ib_value(p, idx + 0);
763           texdw[1] = radeon_get_ib_value(p, idx + 1);
764           texdw[2] = radeon_get_ib_value(p, idx + 2);
765           texdw[3] = radeon_get_ib_value(p, idx + 3);
766           texdw[4] = radeon_get_ib_value(p, idx + 4);
767           texdw[5] = radeon_get_ib_value(p, idx + 5);
768           texdw[6] = radeon_get_ib_value(p, idx + 6);
769           texdw[7] = radeon_get_ib_value(p, idx + 7);
770           dim = G_030000_DIM(texdw[0]);
771           llevel = G_030014_LAST_LEVEL(texdw[5]);
772           mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
773           width = G_030000_TEX_WIDTH(texdw[0]) + 1;
774           height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
775           depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
776           surf.format = G_03001C_DATA_FORMAT(texdw[7]);
777           surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
778           surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
779           surf.nby = r600_fmt_get_nblocksy(surf.format, height);
780           surf.mode = G_030004_ARRAY_MODE(texdw[1]);
781           surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
782           surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
783           surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
784           surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
785           surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
786           surf.nsamples = 1;
787           toffset = texdw[2] << 8;
788           moffset = texdw[3] << 8;
789 
790           if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
791                     dev_warn(p->dev, "%s:%d texture invalid format %d\n",
792                                __func__, __LINE__, surf.format);
793                     return -EINVAL;
794           }
795           switch (dim) {
796           case V_030000_SQ_TEX_DIM_1D:
797           case V_030000_SQ_TEX_DIM_2D:
798           case V_030000_SQ_TEX_DIM_CUBEMAP:
799           case V_030000_SQ_TEX_DIM_1D_ARRAY:
800           case V_030000_SQ_TEX_DIM_2D_ARRAY:
801                     depth = 1;
802                     break;
803           case V_030000_SQ_TEX_DIM_2D_MSAA:
804           case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
805                     surf.nsamples = 1 << llevel;
806                     llevel = 0;
807                     depth = 1;
808                     break;
809           case V_030000_SQ_TEX_DIM_3D:
810                     break;
811           default:
812                     dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
813                                __func__, __LINE__, dim);
814                     return -EINVAL;
815           }
816 
817           r = evergreen_surface_value_conv_check(p, &surf, "texture");
818           if (r) {
819                     return r;
820           }
821 
822           /* align height */
823           evergreen_surface_check(p, &surf, NULL);
824           surf.nby = ALIGN(surf.nby, surf.halign);
825 
826           r = evergreen_surface_check(p, &surf, "texture");
827           if (r) {
828                     dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
829                                __func__, __LINE__, texdw[0], texdw[1], texdw[4],
830                                texdw[5], texdw[6], texdw[7]);
831                     return r;
832           }
833 
834           /* check texture size */
835           if (toffset & (surf.base_align - 1)) {
836                     dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
837                                __func__, __LINE__, toffset, surf.base_align);
838                     return -EINVAL;
839           }
840           if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
841                     dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
842                                __func__, __LINE__, moffset, surf.base_align);
843                     return -EINVAL;
844           }
845           if (dim == SQ_TEX_DIM_3D) {
846                     toffset += surf.layer_size * depth;
847           } else {
848                     toffset += surf.layer_size * mslice;
849           }
850           if (toffset > radeon_bo_size(texture)) {
851                     dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
852                                "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
853                                __func__, __LINE__, surf.layer_size,
854                               (unsigned long)texdw[2] << 8, mslice,
855                               depth, radeon_bo_size(texture),
856                               surf.nbx, surf.nby);
857                     return -EINVAL;
858           }
859 
860           if (!mipmap) {
861                     if (llevel) {
862                               dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
863                                          __func__, __LINE__);
864                               return -EINVAL;
865                     } else {
866                               return 0; /* everything's ok */
867                     }
868           }
869 
870           /* check mipmap size */
871           for (i = 1; i <= llevel; i++) {
872                     unsigned w, h, d;
873 
874                     w = r600_mip_minify(width, i);
875                     h = r600_mip_minify(height, i);
876                     d = r600_mip_minify(depth, i);
877                     surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
878                     surf.nby = r600_fmt_get_nblocksy(surf.format, h);
879 
880                     switch (surf.mode) {
881                     case ARRAY_2D_TILED_THIN1:
882                               if (surf.nbx < surf.palign || surf.nby < surf.halign) {
883                                         surf.mode = ARRAY_1D_TILED_THIN1;
884                               }
885                               /* recompute alignment */
886                               evergreen_surface_check(p, &surf, NULL);
887                               break;
888                     case ARRAY_LINEAR_GENERAL:
889                     case ARRAY_LINEAR_ALIGNED:
890                     case ARRAY_1D_TILED_THIN1:
891                               break;
892                     default:
893                               dev_warn(p->dev, "%s:%d invalid array mode %d\n",
894                                          __func__, __LINE__, surf.mode);
895                               return -EINVAL;
896                     }
897                     surf.nbx = ALIGN(surf.nbx, surf.palign);
898                     surf.nby = ALIGN(surf.nby, surf.halign);
899 
900                     r = evergreen_surface_check(p, &surf, "mipmap");
901                     if (r) {
902                               return r;
903                     }
904 
905                     if (dim == SQ_TEX_DIM_3D) {
906                               moffset += surf.layer_size * d;
907                     } else {
908                               moffset += surf.layer_size * mslice;
909                     }
910                     if (moffset > radeon_bo_size(mipmap)) {
911                               dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
912                                                   "offset %ld, coffset %ld, max layer %d, depth %d, "
913                                                   "bo size %ld) level0 (%d %d %d)\n",
914                                                   __func__, __LINE__, i, surf.layer_size,
915                                                   (unsigned long)texdw[3] << 8, moffset, mslice,
916                                                   d, radeon_bo_size(mipmap),
917                                                   width, height, depth);
918                               dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
919                                          __func__, __LINE__, surf.nbx, surf.nby,
920                                         surf.mode, surf.bpe, surf.nsamples,
921                                         surf.bankw, surf.bankh,
922                                         surf.tsplit, surf.mtilea);
923                               return -EINVAL;
924                     }
925           }
926 
927           return 0;
928 }
929 
evergreen_cs_track_check(struct radeon_cs_parser * p)930 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
931 {
932           struct evergreen_cs_track *track = p->track;
933           unsigned tmp, i;
934           int r;
935           unsigned buffer_mask = 0;
936 
937           /* check streamout */
938           if (track->streamout_dirty && track->vgt_strmout_config) {
939                     for (i = 0; i < 4; i++) {
940                               if (track->vgt_strmout_config & (1 << i)) {
941                                         buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
942                               }
943                     }
944 
945                     for (i = 0; i < 4; i++) {
946                               if (buffer_mask & (1 << i)) {
947                                         if (track->vgt_strmout_bo[i]) {
948                                                   u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
949                                                                       (u64)track->vgt_strmout_size[i];
950                                                   if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
951                                                             DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
952                                                                         i, offset,
953                                                                         radeon_bo_size(track->vgt_strmout_bo[i]));
954                                                             return -EINVAL;
955                                                   }
956                                         } else {
957                                                   dev_warn(p->dev, "No buffer for streamout %d\n", i);
958                                                   return -EINVAL;
959                                         }
960                               }
961                     }
962                     track->streamout_dirty = false;
963           }
964 
965           if (track->sx_misc_kill_all_prims)
966                     return 0;
967 
968           /* check that we have a cb for each enabled target
969            */
970           if (track->cb_dirty) {
971                     tmp = track->cb_target_mask;
972                     for (i = 0; i < 8; i++) {
973                               u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
974 
975                               if (format != V_028C70_COLOR_INVALID &&
976                                   (tmp >> (i * 4)) & 0xF) {
977                                         /* at least one component is enabled */
978                                         if (track->cb_color_bo[i] == NULL) {
979                                                   dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
980                                                             __func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
981                                                   return -EINVAL;
982                                         }
983                                         /* check cb */
984                                         r = evergreen_cs_track_validate_cb(p, i);
985                                         if (r) {
986                                                   return r;
987                                         }
988                               }
989                     }
990                     track->cb_dirty = false;
991           }
992 
993           if (track->db_dirty) {
994                     /* Check stencil buffer */
995                     if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
996                         G_028800_STENCIL_ENABLE(track->db_depth_control)) {
997                               r = evergreen_cs_track_validate_stencil(p);
998                               if (r)
999                                         return r;
1000                     }
1001                     /* Check depth buffer */
1002                     if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1003                         G_028800_Z_ENABLE(track->db_depth_control)) {
1004                               r = evergreen_cs_track_validate_depth(p);
1005                               if (r)
1006                                         return r;
1007                     }
1008                     track->db_dirty = false;
1009           }
1010 
1011           return 0;
1012 }
1013 
1014 /**
1015  * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1016  * @parser:                   parser structure holding parsing context.
1017  *
1018  * This is an Evergreen(+)-specific function for parsing VLINE packets.
1019  * Real work is done by r600_cs_common_vline_parse function.
1020  * Here we just set up ASIC-specific register table and call
1021  * the common implementation function.
1022  */
evergreen_cs_packet_parse_vline(struct radeon_cs_parser * p)1023 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1024 {
1025 
1026           static uint32_t vline_start_end[6] = {
1027                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1028                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1029                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1030                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1031                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1032                     EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1033           };
1034           static uint32_t vline_status[6] = {
1035                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1036                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1037                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1038                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1039                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1040                     EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1041           };
1042 
1043           return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1044 }
1045 
evergreen_packet0_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt,unsigned idx,unsigned reg)1046 static int evergreen_packet0_check(struct radeon_cs_parser *p,
1047                                            struct radeon_cs_packet *pkt,
1048                                            unsigned idx, unsigned reg)
1049 {
1050           int r;
1051 
1052           switch (reg) {
1053           case EVERGREEN_VLINE_START_END:
1054                     r = evergreen_cs_packet_parse_vline(p);
1055                     if (r) {
1056                               DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1057                                                   idx, reg);
1058                               return r;
1059                     }
1060                     break;
1061           default:
1062                     pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
1063                     return -EINVAL;
1064           }
1065           return 0;
1066 }
1067 
evergreen_cs_parse_packet0(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1068 static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1069                                               struct radeon_cs_packet *pkt)
1070 {
1071           unsigned reg, i;
1072           unsigned idx;
1073           int r;
1074 
1075           idx = pkt->idx + 1;
1076           reg = pkt->reg;
1077           for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1078                     r = evergreen_packet0_check(p, pkt, idx, reg);
1079                     if (r) {
1080                               return r;
1081                     }
1082           }
1083           return 0;
1084 }
1085 
1086 /**
1087  * evergreen_cs_handle_reg() - process registers that need special handling.
1088  * @parser: parser structure holding parsing context
1089  * @reg: register we are testing
1090  * @idx: index into the cs buffer
1091  */
evergreen_cs_handle_reg(struct radeon_cs_parser * p,u32 reg,u32 idx)1092 static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1093 {
1094           struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1095           struct radeon_bo_list *reloc;
1096           u32 tmp, *ib;
1097           int r;
1098 
1099           ib = p->ib.ptr;
1100           switch (reg) {
1101           /* force following reg to 0 in an attempt to disable out buffer
1102            * which will need us to better understand how it works to perform
1103            * security check on it (Jerome)
1104            */
1105           case SQ_ESGS_RING_SIZE:
1106           case SQ_GSVS_RING_SIZE:
1107           case SQ_ESTMP_RING_SIZE:
1108           case SQ_GSTMP_RING_SIZE:
1109           case SQ_HSTMP_RING_SIZE:
1110           case SQ_LSTMP_RING_SIZE:
1111           case SQ_PSTMP_RING_SIZE:
1112           case SQ_VSTMP_RING_SIZE:
1113           case SQ_ESGS_RING_ITEMSIZE:
1114           case SQ_ESTMP_RING_ITEMSIZE:
1115           case SQ_GSTMP_RING_ITEMSIZE:
1116           case SQ_GSVS_RING_ITEMSIZE:
1117           case SQ_GS_VERT_ITEMSIZE:
1118           case SQ_GS_VERT_ITEMSIZE_1:
1119           case SQ_GS_VERT_ITEMSIZE_2:
1120           case SQ_GS_VERT_ITEMSIZE_3:
1121           case SQ_GSVS_RING_OFFSET_1:
1122           case SQ_GSVS_RING_OFFSET_2:
1123           case SQ_GSVS_RING_OFFSET_3:
1124           case SQ_HSTMP_RING_ITEMSIZE:
1125           case SQ_LSTMP_RING_ITEMSIZE:
1126           case SQ_PSTMP_RING_ITEMSIZE:
1127           case SQ_VSTMP_RING_ITEMSIZE:
1128           case VGT_TF_RING_SIZE:
1129                     /* get value to populate the IB don't remove */
1130                     /*tmp =radeon_get_ib_value(p, idx);
1131                       ib[idx] = 0;*/
1132                     break;
1133           case SQ_ESGS_RING_BASE:
1134           case SQ_GSVS_RING_BASE:
1135           case SQ_ESTMP_RING_BASE:
1136           case SQ_GSTMP_RING_BASE:
1137           case SQ_HSTMP_RING_BASE:
1138           case SQ_LSTMP_RING_BASE:
1139           case SQ_PSTMP_RING_BASE:
1140           case SQ_VSTMP_RING_BASE:
1141                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1142                     if (r) {
1143                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1144                                                   "0x%04X\n", reg);
1145                               return -EINVAL;
1146                     }
1147                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1148                     break;
1149           case DB_DEPTH_CONTROL:
1150                     track->db_depth_control = radeon_get_ib_value(p, idx);
1151                     track->db_dirty = true;
1152                     break;
1153           case CAYMAN_DB_EQAA:
1154                     if (p->rdev->family < CHIP_CAYMAN) {
1155                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1156                                          "0x%04X\n", reg);
1157                               return -EINVAL;
1158                     }
1159                     break;
1160           case CAYMAN_DB_DEPTH_INFO:
1161                     if (p->rdev->family < CHIP_CAYMAN) {
1162                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1163                                          "0x%04X\n", reg);
1164                               return -EINVAL;
1165                     }
1166                     break;
1167           case DB_Z_INFO:
1168                     track->db_z_info = radeon_get_ib_value(p, idx);
1169                     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1170                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1171                               if (r) {
1172                                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1173                                                             "0x%04X\n", reg);
1174                                         return -EINVAL;
1175                               }
1176                               ib[idx] &= ~Z_ARRAY_MODE(0xf);
1177                               track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1178                               ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1179                               track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1180                               if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1181                                         unsigned bankw, bankh, mtaspect, tile_split;
1182 
1183                                         evergreen_tiling_fields(reloc->tiling_flags,
1184                                                                       &bankw, &bankh, &mtaspect,
1185                                                                       &tile_split);
1186                                         ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1187                                         ib[idx] |= DB_TILE_SPLIT(tile_split) |
1188                                                             DB_BANK_WIDTH(bankw) |
1189                                                             DB_BANK_HEIGHT(bankh) |
1190                                                             DB_MACRO_TILE_ASPECT(mtaspect);
1191                               }
1192                     }
1193                     track->db_dirty = true;
1194                     break;
1195           case DB_STENCIL_INFO:
1196                     track->db_s_info = radeon_get_ib_value(p, idx);
1197                     track->db_dirty = true;
1198                     break;
1199           case DB_DEPTH_VIEW:
1200                     track->db_depth_view = radeon_get_ib_value(p, idx);
1201                     track->db_dirty = true;
1202                     break;
1203           case DB_DEPTH_SIZE:
1204                     track->db_depth_size = radeon_get_ib_value(p, idx);
1205                     track->db_dirty = true;
1206                     break;
1207           case R_02805C_DB_DEPTH_SLICE:
1208                     track->db_depth_slice = radeon_get_ib_value(p, idx);
1209                     track->db_dirty = true;
1210                     break;
1211           case DB_Z_READ_BASE:
1212                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1213                     if (r) {
1214                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1215                                                   "0x%04X\n", reg);
1216                               return -EINVAL;
1217                     }
1218                     track->db_z_read_offset = radeon_get_ib_value(p, idx);
1219                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1220                     track->db_z_read_bo = reloc->robj;
1221                     track->db_dirty = true;
1222                     break;
1223           case DB_Z_WRITE_BASE:
1224                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1225                     if (r) {
1226                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1227                                                   "0x%04X\n", reg);
1228                               return -EINVAL;
1229                     }
1230                     track->db_z_write_offset = radeon_get_ib_value(p, idx);
1231                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1232                     track->db_z_write_bo = reloc->robj;
1233                     track->db_dirty = true;
1234                     break;
1235           case DB_STENCIL_READ_BASE:
1236                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1237                     if (r) {
1238                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1239                                                   "0x%04X\n", reg);
1240                               return -EINVAL;
1241                     }
1242                     track->db_s_read_offset = radeon_get_ib_value(p, idx);
1243                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1244                     track->db_s_read_bo = reloc->robj;
1245                     track->db_dirty = true;
1246                     break;
1247           case DB_STENCIL_WRITE_BASE:
1248                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1249                     if (r) {
1250                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1251                                                   "0x%04X\n", reg);
1252                               return -EINVAL;
1253                     }
1254                     track->db_s_write_offset = radeon_get_ib_value(p, idx);
1255                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1256                     track->db_s_write_bo = reloc->robj;
1257                     track->db_dirty = true;
1258                     break;
1259           case VGT_STRMOUT_CONFIG:
1260                     track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1261                     track->streamout_dirty = true;
1262                     break;
1263           case VGT_STRMOUT_BUFFER_CONFIG:
1264                     track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1265                     track->streamout_dirty = true;
1266                     break;
1267           case VGT_STRMOUT_BUFFER_BASE_0:
1268           case VGT_STRMOUT_BUFFER_BASE_1:
1269           case VGT_STRMOUT_BUFFER_BASE_2:
1270           case VGT_STRMOUT_BUFFER_BASE_3:
1271                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1272                     if (r) {
1273                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1274                                                   "0x%04X\n", reg);
1275                               return -EINVAL;
1276                     }
1277                     tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1278                     track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1279                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1280                     track->vgt_strmout_bo[tmp] = reloc->robj;
1281                     track->streamout_dirty = true;
1282                     break;
1283           case VGT_STRMOUT_BUFFER_SIZE_0:
1284           case VGT_STRMOUT_BUFFER_SIZE_1:
1285           case VGT_STRMOUT_BUFFER_SIZE_2:
1286           case VGT_STRMOUT_BUFFER_SIZE_3:
1287                     tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1288                     /* size in register is DWs, convert to bytes */
1289                     track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1290                     track->streamout_dirty = true;
1291                     break;
1292           case CP_COHER_BASE:
1293                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1294                     if (r) {
1295                               dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1296                                                   "0x%04X\n", reg);
1297                               return -EINVAL;
1298                     }
1299                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1300                     break;
1301           case CB_TARGET_MASK:
1302                     track->cb_target_mask = radeon_get_ib_value(p, idx);
1303                     track->cb_dirty = true;
1304                     break;
1305           case CB_SHADER_MASK:
1306                     track->cb_shader_mask = radeon_get_ib_value(p, idx);
1307                     track->cb_dirty = true;
1308                     break;
1309           case PA_SC_AA_CONFIG:
1310                     if (p->rdev->family >= CHIP_CAYMAN) {
1311                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1312                                          "0x%04X\n", reg);
1313                               return -EINVAL;
1314                     }
1315                     tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1316                     track->nsamples = 1 << tmp;
1317                     break;
1318           case CAYMAN_PA_SC_AA_CONFIG:
1319                     if (p->rdev->family < CHIP_CAYMAN) {
1320                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1321                                          "0x%04X\n", reg);
1322                               return -EINVAL;
1323                     }
1324                     tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1325                     track->nsamples = 1 << tmp;
1326                     break;
1327           case CB_COLOR0_VIEW:
1328           case CB_COLOR1_VIEW:
1329           case CB_COLOR2_VIEW:
1330           case CB_COLOR3_VIEW:
1331           case CB_COLOR4_VIEW:
1332           case CB_COLOR5_VIEW:
1333           case CB_COLOR6_VIEW:
1334           case CB_COLOR7_VIEW:
1335                     tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1336                     track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1337                     track->cb_dirty = true;
1338                     break;
1339           case CB_COLOR8_VIEW:
1340           case CB_COLOR9_VIEW:
1341           case CB_COLOR10_VIEW:
1342           case CB_COLOR11_VIEW:
1343                     tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1344                     track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1345                     track->cb_dirty = true;
1346                     break;
1347           case CB_COLOR0_INFO:
1348           case CB_COLOR1_INFO:
1349           case CB_COLOR2_INFO:
1350           case CB_COLOR3_INFO:
1351           case CB_COLOR4_INFO:
1352           case CB_COLOR5_INFO:
1353           case CB_COLOR6_INFO:
1354           case CB_COLOR7_INFO:
1355                     tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1356                     track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1357                     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1358                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1359                               if (r) {
1360                                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1361                                                             "0x%04X\n", reg);
1362                                         return -EINVAL;
1363                               }
1364                               ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1365                               track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1366                     }
1367                     track->cb_dirty = true;
1368                     break;
1369           case CB_COLOR8_INFO:
1370           case CB_COLOR9_INFO:
1371           case CB_COLOR10_INFO:
1372           case CB_COLOR11_INFO:
1373                     tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1374                     track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1375                     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1376                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1377                               if (r) {
1378                                         dev_warn(p->dev, "bad SET_CONTEXT_REG "
1379                                                             "0x%04X\n", reg);
1380                                         return -EINVAL;
1381                               }
1382                               ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1383                               track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1384                     }
1385                     track->cb_dirty = true;
1386                     break;
1387           case CB_COLOR0_PITCH:
1388           case CB_COLOR1_PITCH:
1389           case CB_COLOR2_PITCH:
1390           case CB_COLOR3_PITCH:
1391           case CB_COLOR4_PITCH:
1392           case CB_COLOR5_PITCH:
1393           case CB_COLOR6_PITCH:
1394           case CB_COLOR7_PITCH:
1395                     tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1396                     track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1397                     track->cb_dirty = true;
1398                     break;
1399           case CB_COLOR8_PITCH:
1400           case CB_COLOR9_PITCH:
1401           case CB_COLOR10_PITCH:
1402           case CB_COLOR11_PITCH:
1403                     tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1404                     track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1405                     track->cb_dirty = true;
1406                     break;
1407           case CB_COLOR0_SLICE:
1408           case CB_COLOR1_SLICE:
1409           case CB_COLOR2_SLICE:
1410           case CB_COLOR3_SLICE:
1411           case CB_COLOR4_SLICE:
1412           case CB_COLOR5_SLICE:
1413           case CB_COLOR6_SLICE:
1414           case CB_COLOR7_SLICE:
1415                     tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1416                     track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1417                     track->cb_color_slice_idx[tmp] = idx;
1418                     track->cb_dirty = true;
1419                     break;
1420           case CB_COLOR8_SLICE:
1421           case CB_COLOR9_SLICE:
1422           case CB_COLOR10_SLICE:
1423           case CB_COLOR11_SLICE:
1424                     tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1425                     track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1426                     track->cb_color_slice_idx[tmp] = idx;
1427                     track->cb_dirty = true;
1428                     break;
1429           case CB_COLOR0_ATTRIB:
1430           case CB_COLOR1_ATTRIB:
1431           case CB_COLOR2_ATTRIB:
1432           case CB_COLOR3_ATTRIB:
1433           case CB_COLOR4_ATTRIB:
1434           case CB_COLOR5_ATTRIB:
1435           case CB_COLOR6_ATTRIB:
1436           case CB_COLOR7_ATTRIB:
1437                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1438                     if (r) {
1439                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1440                                                   "0x%04X\n", reg);
1441                               return -EINVAL;
1442                     }
1443                     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1444                               if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1445                                         unsigned bankw, bankh, mtaspect, tile_split;
1446 
1447                                         evergreen_tiling_fields(reloc->tiling_flags,
1448                                                                       &bankw, &bankh, &mtaspect,
1449                                                                       &tile_split);
1450                                         ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1451                                         ib[idx] |= CB_TILE_SPLIT(tile_split) |
1452                                                      CB_BANK_WIDTH(bankw) |
1453                                                      CB_BANK_HEIGHT(bankh) |
1454                                                      CB_MACRO_TILE_ASPECT(mtaspect);
1455                               }
1456                     }
1457                     tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1458                     track->cb_color_attrib[tmp] = ib[idx];
1459                     track->cb_dirty = true;
1460                     break;
1461           case CB_COLOR8_ATTRIB:
1462           case CB_COLOR9_ATTRIB:
1463           case CB_COLOR10_ATTRIB:
1464           case CB_COLOR11_ATTRIB:
1465                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1466                     if (r) {
1467                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1468                                                   "0x%04X\n", reg);
1469                               return -EINVAL;
1470                     }
1471                     if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1472                               if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1473                                         unsigned bankw, bankh, mtaspect, tile_split;
1474 
1475                                         evergreen_tiling_fields(reloc->tiling_flags,
1476                                                                       &bankw, &bankh, &mtaspect,
1477                                                                       &tile_split);
1478                                         ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1479                                         ib[idx] |= CB_TILE_SPLIT(tile_split) |
1480                                                      CB_BANK_WIDTH(bankw) |
1481                                                      CB_BANK_HEIGHT(bankh) |
1482                                                      CB_MACRO_TILE_ASPECT(mtaspect);
1483                               }
1484                     }
1485                     tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1486                     track->cb_color_attrib[tmp] = ib[idx];
1487                     track->cb_dirty = true;
1488                     break;
1489           case CB_COLOR0_FMASK:
1490           case CB_COLOR1_FMASK:
1491           case CB_COLOR2_FMASK:
1492           case CB_COLOR3_FMASK:
1493           case CB_COLOR4_FMASK:
1494           case CB_COLOR5_FMASK:
1495           case CB_COLOR6_FMASK:
1496           case CB_COLOR7_FMASK:
1497                     tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1498                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1499                     if (r) {
1500                               dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1501                               return -EINVAL;
1502                     }
1503                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1504                     track->cb_color_fmask_bo[tmp] = reloc->robj;
1505                     break;
1506           case CB_COLOR0_CMASK:
1507           case CB_COLOR1_CMASK:
1508           case CB_COLOR2_CMASK:
1509           case CB_COLOR3_CMASK:
1510           case CB_COLOR4_CMASK:
1511           case CB_COLOR5_CMASK:
1512           case CB_COLOR6_CMASK:
1513           case CB_COLOR7_CMASK:
1514                     tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1515                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1516                     if (r) {
1517                               dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1518                               return -EINVAL;
1519                     }
1520                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1521                     track->cb_color_cmask_bo[tmp] = reloc->robj;
1522                     break;
1523           case CB_COLOR0_FMASK_SLICE:
1524           case CB_COLOR1_FMASK_SLICE:
1525           case CB_COLOR2_FMASK_SLICE:
1526           case CB_COLOR3_FMASK_SLICE:
1527           case CB_COLOR4_FMASK_SLICE:
1528           case CB_COLOR5_FMASK_SLICE:
1529           case CB_COLOR6_FMASK_SLICE:
1530           case CB_COLOR7_FMASK_SLICE:
1531                     tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1532                     track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1533                     break;
1534           case CB_COLOR0_CMASK_SLICE:
1535           case CB_COLOR1_CMASK_SLICE:
1536           case CB_COLOR2_CMASK_SLICE:
1537           case CB_COLOR3_CMASK_SLICE:
1538           case CB_COLOR4_CMASK_SLICE:
1539           case CB_COLOR5_CMASK_SLICE:
1540           case CB_COLOR6_CMASK_SLICE:
1541           case CB_COLOR7_CMASK_SLICE:
1542                     tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1543                     track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1544                     break;
1545           case CB_COLOR0_BASE:
1546           case CB_COLOR1_BASE:
1547           case CB_COLOR2_BASE:
1548           case CB_COLOR3_BASE:
1549           case CB_COLOR4_BASE:
1550           case CB_COLOR5_BASE:
1551           case CB_COLOR6_BASE:
1552           case CB_COLOR7_BASE:
1553                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1554                     if (r) {
1555                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1556                                                   "0x%04X\n", reg);
1557                               return -EINVAL;
1558                     }
1559                     tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1560                     track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1561                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1562                     track->cb_color_bo[tmp] = reloc->robj;
1563                     track->cb_dirty = true;
1564                     break;
1565           case CB_COLOR8_BASE:
1566           case CB_COLOR9_BASE:
1567           case CB_COLOR10_BASE:
1568           case CB_COLOR11_BASE:
1569                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1570                     if (r) {
1571                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1572                                                   "0x%04X\n", reg);
1573                               return -EINVAL;
1574                     }
1575                     tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1576                     track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1577                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1578                     track->cb_color_bo[tmp] = reloc->robj;
1579                     track->cb_dirty = true;
1580                     break;
1581           case DB_HTILE_DATA_BASE:
1582                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1583                     if (r) {
1584                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1585                                                   "0x%04X\n", reg);
1586                               return -EINVAL;
1587                     }
1588                     track->htile_offset = radeon_get_ib_value(p, idx);
1589                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1590                     track->htile_bo = reloc->robj;
1591                     track->db_dirty = true;
1592                     break;
1593           case DB_HTILE_SURFACE:
1594                     /* 8x8 only */
1595                     track->htile_surface = radeon_get_ib_value(p, idx);
1596                     /* force 8x8 htile width and height */
1597                     ib[idx] |= 3;
1598                     track->db_dirty = true;
1599                     break;
1600           case CB_IMMED0_BASE:
1601           case CB_IMMED1_BASE:
1602           case CB_IMMED2_BASE:
1603           case CB_IMMED3_BASE:
1604           case CB_IMMED4_BASE:
1605           case CB_IMMED5_BASE:
1606           case CB_IMMED6_BASE:
1607           case CB_IMMED7_BASE:
1608           case CB_IMMED8_BASE:
1609           case CB_IMMED9_BASE:
1610           case CB_IMMED10_BASE:
1611           case CB_IMMED11_BASE:
1612           case SQ_PGM_START_FS:
1613           case SQ_PGM_START_ES:
1614           case SQ_PGM_START_VS:
1615           case SQ_PGM_START_GS:
1616           case SQ_PGM_START_PS:
1617           case SQ_PGM_START_HS:
1618           case SQ_PGM_START_LS:
1619           case SQ_CONST_MEM_BASE:
1620           case SQ_ALU_CONST_CACHE_GS_0:
1621           case SQ_ALU_CONST_CACHE_GS_1:
1622           case SQ_ALU_CONST_CACHE_GS_2:
1623           case SQ_ALU_CONST_CACHE_GS_3:
1624           case SQ_ALU_CONST_CACHE_GS_4:
1625           case SQ_ALU_CONST_CACHE_GS_5:
1626           case SQ_ALU_CONST_CACHE_GS_6:
1627           case SQ_ALU_CONST_CACHE_GS_7:
1628           case SQ_ALU_CONST_CACHE_GS_8:
1629           case SQ_ALU_CONST_CACHE_GS_9:
1630           case SQ_ALU_CONST_CACHE_GS_10:
1631           case SQ_ALU_CONST_CACHE_GS_11:
1632           case SQ_ALU_CONST_CACHE_GS_12:
1633           case SQ_ALU_CONST_CACHE_GS_13:
1634           case SQ_ALU_CONST_CACHE_GS_14:
1635           case SQ_ALU_CONST_CACHE_GS_15:
1636           case SQ_ALU_CONST_CACHE_PS_0:
1637           case SQ_ALU_CONST_CACHE_PS_1:
1638           case SQ_ALU_CONST_CACHE_PS_2:
1639           case SQ_ALU_CONST_CACHE_PS_3:
1640           case SQ_ALU_CONST_CACHE_PS_4:
1641           case SQ_ALU_CONST_CACHE_PS_5:
1642           case SQ_ALU_CONST_CACHE_PS_6:
1643           case SQ_ALU_CONST_CACHE_PS_7:
1644           case SQ_ALU_CONST_CACHE_PS_8:
1645           case SQ_ALU_CONST_CACHE_PS_9:
1646           case SQ_ALU_CONST_CACHE_PS_10:
1647           case SQ_ALU_CONST_CACHE_PS_11:
1648           case SQ_ALU_CONST_CACHE_PS_12:
1649           case SQ_ALU_CONST_CACHE_PS_13:
1650           case SQ_ALU_CONST_CACHE_PS_14:
1651           case SQ_ALU_CONST_CACHE_PS_15:
1652           case SQ_ALU_CONST_CACHE_VS_0:
1653           case SQ_ALU_CONST_CACHE_VS_1:
1654           case SQ_ALU_CONST_CACHE_VS_2:
1655           case SQ_ALU_CONST_CACHE_VS_3:
1656           case SQ_ALU_CONST_CACHE_VS_4:
1657           case SQ_ALU_CONST_CACHE_VS_5:
1658           case SQ_ALU_CONST_CACHE_VS_6:
1659           case SQ_ALU_CONST_CACHE_VS_7:
1660           case SQ_ALU_CONST_CACHE_VS_8:
1661           case SQ_ALU_CONST_CACHE_VS_9:
1662           case SQ_ALU_CONST_CACHE_VS_10:
1663           case SQ_ALU_CONST_CACHE_VS_11:
1664           case SQ_ALU_CONST_CACHE_VS_12:
1665           case SQ_ALU_CONST_CACHE_VS_13:
1666           case SQ_ALU_CONST_CACHE_VS_14:
1667           case SQ_ALU_CONST_CACHE_VS_15:
1668           case SQ_ALU_CONST_CACHE_HS_0:
1669           case SQ_ALU_CONST_CACHE_HS_1:
1670           case SQ_ALU_CONST_CACHE_HS_2:
1671           case SQ_ALU_CONST_CACHE_HS_3:
1672           case SQ_ALU_CONST_CACHE_HS_4:
1673           case SQ_ALU_CONST_CACHE_HS_5:
1674           case SQ_ALU_CONST_CACHE_HS_6:
1675           case SQ_ALU_CONST_CACHE_HS_7:
1676           case SQ_ALU_CONST_CACHE_HS_8:
1677           case SQ_ALU_CONST_CACHE_HS_9:
1678           case SQ_ALU_CONST_CACHE_HS_10:
1679           case SQ_ALU_CONST_CACHE_HS_11:
1680           case SQ_ALU_CONST_CACHE_HS_12:
1681           case SQ_ALU_CONST_CACHE_HS_13:
1682           case SQ_ALU_CONST_CACHE_HS_14:
1683           case SQ_ALU_CONST_CACHE_HS_15:
1684           case SQ_ALU_CONST_CACHE_LS_0:
1685           case SQ_ALU_CONST_CACHE_LS_1:
1686           case SQ_ALU_CONST_CACHE_LS_2:
1687           case SQ_ALU_CONST_CACHE_LS_3:
1688           case SQ_ALU_CONST_CACHE_LS_4:
1689           case SQ_ALU_CONST_CACHE_LS_5:
1690           case SQ_ALU_CONST_CACHE_LS_6:
1691           case SQ_ALU_CONST_CACHE_LS_7:
1692           case SQ_ALU_CONST_CACHE_LS_8:
1693           case SQ_ALU_CONST_CACHE_LS_9:
1694           case SQ_ALU_CONST_CACHE_LS_10:
1695           case SQ_ALU_CONST_CACHE_LS_11:
1696           case SQ_ALU_CONST_CACHE_LS_12:
1697           case SQ_ALU_CONST_CACHE_LS_13:
1698           case SQ_ALU_CONST_CACHE_LS_14:
1699           case SQ_ALU_CONST_CACHE_LS_15:
1700                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1701                     if (r) {
1702                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1703                                                   "0x%04X\n", reg);
1704                               return -EINVAL;
1705                     }
1706                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1707                     break;
1708           case SX_MEMORY_EXPORT_BASE:
1709                     if (p->rdev->family >= CHIP_CAYMAN) {
1710                               dev_warn(p->dev, "bad SET_CONFIG_REG "
1711                                          "0x%04X\n", reg);
1712                               return -EINVAL;
1713                     }
1714                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1715                     if (r) {
1716                               dev_warn(p->dev, "bad SET_CONFIG_REG "
1717                                                   "0x%04X\n", reg);
1718                               return -EINVAL;
1719                     }
1720                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1721                     break;
1722           case CAYMAN_SX_SCATTER_EXPORT_BASE:
1723                     if (p->rdev->family < CHIP_CAYMAN) {
1724                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1725                                          "0x%04X\n", reg);
1726                               return -EINVAL;
1727                     }
1728                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1729                     if (r) {
1730                               dev_warn(p->dev, "bad SET_CONTEXT_REG "
1731                                                   "0x%04X\n", reg);
1732                               return -EINVAL;
1733                     }
1734                     ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1735                     break;
1736           case SX_MISC:
1737                     track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1738                     break;
1739           default:
1740                     dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1741                     return -EINVAL;
1742           }
1743           return 0;
1744 }
1745 
1746 /**
1747  * evergreen_is_safe_reg() - check if register is authorized or not
1748  * @parser: parser structure holding parsing context
1749  * @reg: register we are testing
1750  *
1751  * This function will test against reg_safe_bm and return true
1752  * if register is safe or false otherwise.
1753  */
evergreen_is_safe_reg(struct radeon_cs_parser * p,u32 reg)1754 static inline bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg)
1755 {
1756           struct evergreen_cs_track *track = p->track;
1757           u32 m, i;
1758 
1759           i = (reg >> 7);
1760           if (unlikely(i >= REG_SAFE_BM_SIZE)) {
1761                     return false;
1762           }
1763           m = 1 << ((reg >> 2) & 31);
1764           if (!(track->reg_safe_bm[i] & m))
1765                     return true;
1766 
1767           return false;
1768 }
1769 
evergreen_packet3_check(struct radeon_cs_parser * p,struct radeon_cs_packet * pkt)1770 static int evergreen_packet3_check(struct radeon_cs_parser *p,
1771                                            struct radeon_cs_packet *pkt)
1772 {
1773           struct radeon_bo_list *reloc;
1774           struct evergreen_cs_track *track;
1775           uint32_t *ib;
1776           unsigned idx;
1777           unsigned i;
1778           unsigned start_reg, end_reg, reg;
1779           int r;
1780           u32 idx_value;
1781 
1782           track = (struct evergreen_cs_track *)p->track;
1783           ib = p->ib.ptr;
1784           idx = pkt->idx + 1;
1785           idx_value = radeon_get_ib_value(p, idx);
1786 
1787           switch (pkt->opcode) {
1788           case PACKET3_SET_PREDICATION:
1789           {
1790                     int pred_op;
1791                     int tmp;
1792                     uint64_t offset;
1793 
1794                     if (pkt->count != 1) {
1795                               DRM_ERROR("bad SET PREDICATION\n");
1796                               return -EINVAL;
1797                     }
1798 
1799                     tmp = radeon_get_ib_value(p, idx + 1);
1800                     pred_op = (tmp >> 16) & 0x7;
1801 
1802                     /* for the clear predicate operation */
1803                     if (pred_op == 0)
1804                               return 0;
1805 
1806                     if (pred_op > 2) {
1807                               DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1808                               return -EINVAL;
1809                     }
1810 
1811                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1812                     if (r) {
1813                               DRM_ERROR("bad SET PREDICATION\n");
1814                               return -EINVAL;
1815                     }
1816 
1817                     offset = reloc->gpu_offset +
1818                                (idx_value & 0xfffffff0) +
1819                                ((u64)(tmp & 0xff) << 32);
1820 
1821                     ib[idx + 0] = offset;
1822                     ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1823           }
1824           break;
1825           case PACKET3_CONTEXT_CONTROL:
1826                     if (pkt->count != 1) {
1827                               DRM_ERROR("bad CONTEXT_CONTROL\n");
1828                               return -EINVAL;
1829                     }
1830                     break;
1831           case PACKET3_INDEX_TYPE:
1832           case PACKET3_NUM_INSTANCES:
1833           case PACKET3_CLEAR_STATE:
1834                     if (pkt->count) {
1835                               DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1836                               return -EINVAL;
1837                     }
1838                     break;
1839           case CAYMAN_PACKET3_DEALLOC_STATE:
1840                     if (p->rdev->family < CHIP_CAYMAN) {
1841                               DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1842                               return -EINVAL;
1843                     }
1844                     if (pkt->count) {
1845                               DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1846                               return -EINVAL;
1847                     }
1848                     break;
1849           case PACKET3_INDEX_BASE:
1850           {
1851                     uint64_t offset;
1852 
1853                     if (pkt->count != 1) {
1854                               DRM_ERROR("bad INDEX_BASE\n");
1855                               return -EINVAL;
1856                     }
1857                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1858                     if (r) {
1859                               DRM_ERROR("bad INDEX_BASE\n");
1860                               return -EINVAL;
1861                     }
1862 
1863                     offset = reloc->gpu_offset +
1864                                idx_value +
1865                                ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1866 
1867                     ib[idx+0] = offset;
1868                     ib[idx+1] = upper_32_bits(offset) & 0xff;
1869 
1870                     r = evergreen_cs_track_check(p);
1871                     if (r) {
1872                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1873                               return r;
1874                     }
1875                     break;
1876           }
1877           case PACKET3_INDEX_BUFFER_SIZE:
1878           {
1879                     if (pkt->count != 0) {
1880                               DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1881                               return -EINVAL;
1882                     }
1883                     break;
1884           }
1885           case PACKET3_DRAW_INDEX:
1886           {
1887                     uint64_t offset;
1888                     if (pkt->count != 3) {
1889                               DRM_ERROR("bad DRAW_INDEX\n");
1890                               return -EINVAL;
1891                     }
1892                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1893                     if (r) {
1894                               DRM_ERROR("bad DRAW_INDEX\n");
1895                               return -EINVAL;
1896                     }
1897 
1898                     offset = reloc->gpu_offset +
1899                                idx_value +
1900                                ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1901 
1902                     ib[idx+0] = offset;
1903                     ib[idx+1] = upper_32_bits(offset) & 0xff;
1904 
1905                     r = evergreen_cs_track_check(p);
1906                     if (r) {
1907                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1908                               return r;
1909                     }
1910                     break;
1911           }
1912           case PACKET3_DRAW_INDEX_2:
1913           {
1914                     uint64_t offset;
1915 
1916                     if (pkt->count != 4) {
1917                               DRM_ERROR("bad DRAW_INDEX_2\n");
1918                               return -EINVAL;
1919                     }
1920                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1921                     if (r) {
1922                               DRM_ERROR("bad DRAW_INDEX_2\n");
1923                               return -EINVAL;
1924                     }
1925 
1926                     offset = reloc->gpu_offset +
1927                                radeon_get_ib_value(p, idx+1) +
1928                                ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1929 
1930                     ib[idx+1] = offset;
1931                     ib[idx+2] = upper_32_bits(offset) & 0xff;
1932 
1933                     r = evergreen_cs_track_check(p);
1934                     if (r) {
1935                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1936                               return r;
1937                     }
1938                     break;
1939           }
1940           case PACKET3_DRAW_INDEX_AUTO:
1941                     if (pkt->count != 1) {
1942                               DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1943                               return -EINVAL;
1944                     }
1945                     r = evergreen_cs_track_check(p);
1946                     if (r) {
1947                               dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1948                               return r;
1949                     }
1950                     break;
1951           case PACKET3_DRAW_INDEX_MULTI_AUTO:
1952                     if (pkt->count != 2) {
1953                               DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1954                               return -EINVAL;
1955                     }
1956                     r = evergreen_cs_track_check(p);
1957                     if (r) {
1958                               dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1959                               return r;
1960                     }
1961                     break;
1962           case PACKET3_DRAW_INDEX_IMMD:
1963                     if (pkt->count < 2) {
1964                               DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1965                               return -EINVAL;
1966                     }
1967                     r = evergreen_cs_track_check(p);
1968                     if (r) {
1969                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1970                               return r;
1971                     }
1972                     break;
1973           case PACKET3_DRAW_INDEX_OFFSET:
1974                     if (pkt->count != 2) {
1975                               DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1976                               return -EINVAL;
1977                     }
1978                     r = evergreen_cs_track_check(p);
1979                     if (r) {
1980                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1981                               return r;
1982                     }
1983                     break;
1984           case PACKET3_DRAW_INDEX_OFFSET_2:
1985                     if (pkt->count != 3) {
1986                               DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
1987                               return -EINVAL;
1988                     }
1989                     r = evergreen_cs_track_check(p);
1990                     if (r) {
1991                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1992                               return r;
1993                     }
1994                     break;
1995           case PACKET3_SET_BASE:
1996           {
1997                     /*
1998                     DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
1999                        2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2000                          0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2001                        3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2002                        4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2003                     */
2004                     if (pkt->count != 2) {
2005                               DRM_ERROR("bad SET_BASE\n");
2006                               return -EINVAL;
2007                     }
2008 
2009                     /* currently only supporting setting indirect draw buffer base address */
2010                     if (idx_value != 1) {
2011                               DRM_ERROR("bad SET_BASE\n");
2012                               return -EINVAL;
2013                     }
2014 
2015                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2016                     if (r) {
2017                               DRM_ERROR("bad SET_BASE\n");
2018                               return -EINVAL;
2019                     }
2020 
2021                     track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2022 
2023                     ib[idx+1] = reloc->gpu_offset;
2024                     ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2025 
2026                     break;
2027           }
2028           case PACKET3_DRAW_INDIRECT:
2029           case PACKET3_DRAW_INDEX_INDIRECT:
2030           {
2031                     u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2032 
2033                     /*
2034                     DW 1 HEADER
2035                        2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2036                        3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2037                     */
2038                     if (pkt->count != 1) {
2039                               DRM_ERROR("bad DRAW_INDIRECT\n");
2040                               return -EINVAL;
2041                     }
2042 
2043                     if (idx_value + size > track->indirect_draw_buffer_size) {
2044                               dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2045                                         idx_value, size, track->indirect_draw_buffer_size);
2046                               return -EINVAL;
2047                     }
2048 
2049                     r = evergreen_cs_track_check(p);
2050                     if (r) {
2051                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2052                               return r;
2053                     }
2054                     break;
2055           }
2056           case PACKET3_DISPATCH_DIRECT:
2057                     if (pkt->count != 3) {
2058                               DRM_ERROR("bad DISPATCH_DIRECT\n");
2059                               return -EINVAL;
2060                     }
2061                     r = evergreen_cs_track_check(p);
2062                     if (r) {
2063                               dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2064                               return r;
2065                     }
2066                     break;
2067           case PACKET3_DISPATCH_INDIRECT:
2068                     if (pkt->count != 1) {
2069                               DRM_ERROR("bad DISPATCH_INDIRECT\n");
2070                               return -EINVAL;
2071                     }
2072                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2073                     if (r) {
2074                               DRM_ERROR("bad DISPATCH_INDIRECT\n");
2075                               return -EINVAL;
2076                     }
2077                     ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2078                     r = evergreen_cs_track_check(p);
2079                     if (r) {
2080                               dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2081                               return r;
2082                     }
2083                     break;
2084           case PACKET3_WAIT_REG_MEM:
2085                     if (pkt->count != 5) {
2086                               DRM_ERROR("bad WAIT_REG_MEM\n");
2087                               return -EINVAL;
2088                     }
2089                     /* bit 4 is reg (0) or mem (1) */
2090                     if (idx_value & 0x10) {
2091                               uint64_t offset;
2092 
2093                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2094                               if (r) {
2095                                         DRM_ERROR("bad WAIT_REG_MEM\n");
2096                                         return -EINVAL;
2097                               }
2098 
2099                               offset = reloc->gpu_offset +
2100                                          (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2101                                          ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2102 
2103                               ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2104                               ib[idx+2] = upper_32_bits(offset) & 0xff;
2105                     } else if (idx_value & 0x100) {
2106                               DRM_ERROR("cannot use PFP on REG wait\n");
2107                               return -EINVAL;
2108                     }
2109                     break;
2110           case PACKET3_CP_DMA:
2111           {
2112                     u32 command, size, info;
2113                     u64 offset, tmp;
2114                     if (pkt->count != 4) {
2115                               DRM_ERROR("bad CP DMA\n");
2116                               return -EINVAL;
2117                     }
2118                     command = radeon_get_ib_value(p, idx+4);
2119                     size = command & 0x1fffff;
2120                     info = radeon_get_ib_value(p, idx+1);
2121                     if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2122                         (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2123                         ((((info & 0x00300000) >> 20) == 0) &&
2124                          (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2125                         ((((info & 0x60000000) >> 29) == 0) &&
2126                          (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2127                               /* non mem to mem copies requires dw aligned count */
2128                               if (size % 4) {
2129                                         DRM_ERROR("CP DMA command requires dw count alignment\n");
2130                                         return -EINVAL;
2131                               }
2132                     }
2133                     if (command & PACKET3_CP_DMA_CMD_SAS) {
2134                               /* src address space is register */
2135                               /* GDS is ok */
2136                               if (((info & 0x60000000) >> 29) != 1) {
2137                                         DRM_ERROR("CP DMA SAS not supported\n");
2138                                         return -EINVAL;
2139                               }
2140                     } else {
2141                               if (command & PACKET3_CP_DMA_CMD_SAIC) {
2142                                         DRM_ERROR("CP DMA SAIC only supported for registers\n");
2143                                         return -EINVAL;
2144                               }
2145                               /* src address space is memory */
2146                               if (((info & 0x60000000) >> 29) == 0) {
2147                                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2148                                         if (r) {
2149                                                   DRM_ERROR("bad CP DMA SRC\n");
2150                                                   return -EINVAL;
2151                                         }
2152 
2153                                         tmp = radeon_get_ib_value(p, idx) +
2154                                                   ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2155 
2156                                         offset = reloc->gpu_offset + tmp;
2157 
2158                                         if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2159                                                   dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2160                                                              tmp + size, radeon_bo_size(reloc->robj));
2161                                                   return -EINVAL;
2162                                         }
2163 
2164                                         ib[idx] = offset;
2165                                         ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2166                               } else if (((info & 0x60000000) >> 29) != 2) {
2167                                         DRM_ERROR("bad CP DMA SRC_SEL\n");
2168                                         return -EINVAL;
2169                               }
2170                     }
2171                     if (command & PACKET3_CP_DMA_CMD_DAS) {
2172                               /* dst address space is register */
2173                               /* GDS is ok */
2174                               if (((info & 0x00300000) >> 20) != 1) {
2175                                         DRM_ERROR("CP DMA DAS not supported\n");
2176                                         return -EINVAL;
2177                               }
2178                     } else {
2179                               /* dst address space is memory */
2180                               if (command & PACKET3_CP_DMA_CMD_DAIC) {
2181                                         DRM_ERROR("CP DMA DAIC only supported for registers\n");
2182                                         return -EINVAL;
2183                               }
2184                               if (((info & 0x00300000) >> 20) == 0) {
2185                                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2186                                         if (r) {
2187                                                   DRM_ERROR("bad CP DMA DST\n");
2188                                                   return -EINVAL;
2189                                         }
2190 
2191                                         tmp = radeon_get_ib_value(p, idx+2) +
2192                                                   ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2193 
2194                                         offset = reloc->gpu_offset + tmp;
2195 
2196                                         if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2197                                                   dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2198                                                              tmp + size, radeon_bo_size(reloc->robj));
2199                                                   return -EINVAL;
2200                                         }
2201 
2202                                         ib[idx+2] = offset;
2203                                         ib[idx+3] = upper_32_bits(offset) & 0xff;
2204                               } else {
2205                                         DRM_ERROR("bad CP DMA DST_SEL\n");
2206                                         return -EINVAL;
2207                               }
2208                     }
2209                     break;
2210           }
2211           case PACKET3_PFP_SYNC_ME:
2212                     if (pkt->count) {
2213                               DRM_ERROR("bad PFP_SYNC_ME\n");
2214                               return -EINVAL;
2215                     }
2216                     break;
2217           case PACKET3_SURFACE_SYNC:
2218                     if (pkt->count != 3) {
2219                               DRM_ERROR("bad SURFACE_SYNC\n");
2220                               return -EINVAL;
2221                     }
2222                     /* 0xffffffff/0x0 is flush all cache flag */
2223                     if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2224                         radeon_get_ib_value(p, idx + 2) != 0) {
2225                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2226                               if (r) {
2227                                         DRM_ERROR("bad SURFACE_SYNC\n");
2228                                         return -EINVAL;
2229                               }
2230                               ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2231                     }
2232                     break;
2233           case PACKET3_EVENT_WRITE:
2234                     if (pkt->count != 2 && pkt->count != 0) {
2235                               DRM_ERROR("bad EVENT_WRITE\n");
2236                               return -EINVAL;
2237                     }
2238                     if (pkt->count) {
2239                               uint64_t offset;
2240 
2241                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2242                               if (r) {
2243                                         DRM_ERROR("bad EVENT_WRITE\n");
2244                                         return -EINVAL;
2245                               }
2246                               offset = reloc->gpu_offset +
2247                                          (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2248                                          ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2249 
2250                               ib[idx+1] = offset & 0xfffffff8;
2251                               ib[idx+2] = upper_32_bits(offset) & 0xff;
2252                     }
2253                     break;
2254           case PACKET3_EVENT_WRITE_EOP:
2255           {
2256                     uint64_t offset;
2257 
2258                     if (pkt->count != 4) {
2259                               DRM_ERROR("bad EVENT_WRITE_EOP\n");
2260                               return -EINVAL;
2261                     }
2262                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2263                     if (r) {
2264                               DRM_ERROR("bad EVENT_WRITE_EOP\n");
2265                               return -EINVAL;
2266                     }
2267 
2268                     offset = reloc->gpu_offset +
2269                                (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2270                                ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2271 
2272                     ib[idx+1] = offset & 0xfffffffc;
2273                     ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2274                     break;
2275           }
2276           case PACKET3_EVENT_WRITE_EOS:
2277           {
2278                     uint64_t offset;
2279 
2280                     if (pkt->count != 3) {
2281                               DRM_ERROR("bad EVENT_WRITE_EOS\n");
2282                               return -EINVAL;
2283                     }
2284                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2285                     if (r) {
2286                               DRM_ERROR("bad EVENT_WRITE_EOS\n");
2287                               return -EINVAL;
2288                     }
2289 
2290                     offset = reloc->gpu_offset +
2291                                (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2292                                ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2293 
2294                     ib[idx+1] = offset & 0xfffffffc;
2295                     ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2296                     break;
2297           }
2298           case PACKET3_SET_CONFIG_REG:
2299                     start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2300                     end_reg = 4 * pkt->count + start_reg - 4;
2301                     if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2302                         (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2303                         (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2304                               DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2305                               return -EINVAL;
2306                     }
2307                     for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2308                               if (evergreen_is_safe_reg(p, reg))
2309                                         continue;
2310                               r = evergreen_cs_handle_reg(p, reg, idx);
2311                               if (r)
2312                                         return r;
2313                     }
2314                     break;
2315           case PACKET3_SET_CONTEXT_REG:
2316                     start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2317                     end_reg = 4 * pkt->count + start_reg - 4;
2318                     if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2319                         (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2320                         (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2321                               DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2322                               return -EINVAL;
2323                     }
2324                     for (reg = start_reg, idx++; reg <= end_reg; reg += 4, idx++) {
2325                               if (evergreen_is_safe_reg(p, reg))
2326                                         continue;
2327                               r = evergreen_cs_handle_reg(p, reg, idx);
2328                               if (r)
2329                                         return r;
2330                     }
2331                     break;
2332           case PACKET3_SET_RESOURCE:
2333                     if (pkt->count % 8) {
2334                               DRM_ERROR("bad SET_RESOURCE\n");
2335                               return -EINVAL;
2336                     }
2337                     start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2338                     end_reg = 4 * pkt->count + start_reg - 4;
2339                     if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2340                         (start_reg >= PACKET3_SET_RESOURCE_END) ||
2341                         (end_reg >= PACKET3_SET_RESOURCE_END)) {
2342                               DRM_ERROR("bad SET_RESOURCE\n");
2343                               return -EINVAL;
2344                     }
2345                     for (i = 0; i < (pkt->count / 8); i++) {
2346                               struct radeon_bo *texture, *mipmap;
2347                               u32 toffset, moffset;
2348                               u32 size, offset, mip_address, tex_dim;
2349 
2350                               switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2351                               case SQ_TEX_VTX_VALID_TEXTURE:
2352                                         /* tex base */
2353                                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2354                                         if (r) {
2355                                                   DRM_ERROR("bad SET_RESOURCE (tex)\n");
2356                                                   return -EINVAL;
2357                                         }
2358                                         if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2359                                                   ib[idx+1+(i*8)+1] |=
2360                                                             TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2361                                                   if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2362                                                             unsigned bankw, bankh, mtaspect, tile_split;
2363 
2364                                                             evergreen_tiling_fields(reloc->tiling_flags,
2365                                                                                           &bankw, &bankh, &mtaspect,
2366                                                                                           &tile_split);
2367                                                             ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2368                                                             ib[idx+1+(i*8)+7] |=
2369                                                                       TEX_BANK_WIDTH(bankw) |
2370                                                                       TEX_BANK_HEIGHT(bankh) |
2371                                                                       MACRO_TILE_ASPECT(mtaspect) |
2372                                                                       TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2373                                                   }
2374                                         }
2375                                         texture = reloc->robj;
2376                                         toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2377 
2378                                         /* tex mip base */
2379                                         tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2380                                         mip_address = ib[idx+1+(i*8)+3];
2381 
2382                                         if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2383                                             !mip_address &&
2384                                             !radeon_cs_packet_next_is_pkt3_nop(p)) {
2385                                                   /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2386                                                    * It should be 0 if FMASK is disabled. */
2387                                                   moffset = 0;
2388                                                   mipmap = NULL;
2389                                         } else {
2390                                                   r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2391                                                   if (r) {
2392                                                             DRM_ERROR("bad SET_RESOURCE (tex)\n");
2393                                                             return -EINVAL;
2394                                                   }
2395                                                   moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2396                                                   mipmap = reloc->robj;
2397                                         }
2398 
2399                                         r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2400                                         if (r)
2401                                                   return r;
2402                                         ib[idx+1+(i*8)+2] += toffset;
2403                                         ib[idx+1+(i*8)+3] += moffset;
2404                                         break;
2405                               case SQ_TEX_VTX_VALID_BUFFER:
2406                               {
2407                                         uint64_t offset64;
2408                                         /* vtx base */
2409                                         r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2410                                         if (r) {
2411                                                   DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2412                                                   return -EINVAL;
2413                                         }
2414                                         offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2415                                         size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2416                                         if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2417                                                   /* force size to size of the buffer */
2418                                                   dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2419                                                   ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2420                                         }
2421 
2422                                         offset64 = reloc->gpu_offset + offset;
2423                                         ib[idx+1+(i*8)+0] = offset64;
2424                                         ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2425                                                                 (upper_32_bits(offset64) & 0xff);
2426                                         break;
2427                               }
2428                               case SQ_TEX_VTX_INVALID_TEXTURE:
2429                               case SQ_TEX_VTX_INVALID_BUFFER:
2430                               default:
2431                                         DRM_ERROR("bad SET_RESOURCE\n");
2432                                         return -EINVAL;
2433                               }
2434                     }
2435                     break;
2436           case PACKET3_SET_ALU_CONST:
2437                     /* XXX fix me ALU const buffers only */
2438                     break;
2439           case PACKET3_SET_BOOL_CONST:
2440                     start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2441                     end_reg = 4 * pkt->count + start_reg - 4;
2442                     if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2443                         (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2444                         (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2445                               DRM_ERROR("bad SET_BOOL_CONST\n");
2446                               return -EINVAL;
2447                     }
2448                     break;
2449           case PACKET3_SET_LOOP_CONST:
2450                     start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2451                     end_reg = 4 * pkt->count + start_reg - 4;
2452                     if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2453                         (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2454                         (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2455                               DRM_ERROR("bad SET_LOOP_CONST\n");
2456                               return -EINVAL;
2457                     }
2458                     break;
2459           case PACKET3_SET_CTL_CONST:
2460                     start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2461                     end_reg = 4 * pkt->count + start_reg - 4;
2462                     if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2463                         (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2464                         (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2465                               DRM_ERROR("bad SET_CTL_CONST\n");
2466                               return -EINVAL;
2467                     }
2468                     break;
2469           case PACKET3_SET_SAMPLER:
2470                     if (pkt->count % 3) {
2471                               DRM_ERROR("bad SET_SAMPLER\n");
2472                               return -EINVAL;
2473                     }
2474                     start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2475                     end_reg = 4 * pkt->count + start_reg - 4;
2476                     if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2477                         (start_reg >= PACKET3_SET_SAMPLER_END) ||
2478                         (end_reg >= PACKET3_SET_SAMPLER_END)) {
2479                               DRM_ERROR("bad SET_SAMPLER\n");
2480                               return -EINVAL;
2481                     }
2482                     break;
2483           case PACKET3_STRMOUT_BUFFER_UPDATE:
2484                     if (pkt->count != 4) {
2485                               DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2486                               return -EINVAL;
2487                     }
2488                     /* Updating memory at DST_ADDRESS. */
2489                     if (idx_value & 0x1) {
2490                               u64 offset;
2491                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2492                               if (r) {
2493                                         DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2494                                         return -EINVAL;
2495                               }
2496                               offset = radeon_get_ib_value(p, idx+1);
2497                               offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2498                               if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2499                                         DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2500                                                     offset + 4, radeon_bo_size(reloc->robj));
2501                                         return -EINVAL;
2502                               }
2503                               offset += reloc->gpu_offset;
2504                               ib[idx+1] = offset;
2505                               ib[idx+2] = upper_32_bits(offset) & 0xff;
2506                     }
2507                     /* Reading data from SRC_ADDRESS. */
2508                     if (((idx_value >> 1) & 0x3) == 2) {
2509                               u64 offset;
2510                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2511                               if (r) {
2512                                         DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2513                                         return -EINVAL;
2514                               }
2515                               offset = radeon_get_ib_value(p, idx+3);
2516                               offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2517                               if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2518                                         DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2519                                                     offset + 4, radeon_bo_size(reloc->robj));
2520                                         return -EINVAL;
2521                               }
2522                               offset += reloc->gpu_offset;
2523                               ib[idx+3] = offset;
2524                               ib[idx+4] = upper_32_bits(offset) & 0xff;
2525                     }
2526                     break;
2527           case PACKET3_MEM_WRITE:
2528           {
2529                     u64 offset;
2530 
2531                     if (pkt->count != 3) {
2532                               DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2533                               return -EINVAL;
2534                     }
2535                     r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2536                     if (r) {
2537                               DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2538                               return -EINVAL;
2539                     }
2540                     offset = radeon_get_ib_value(p, idx+0);
2541                     offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2542                     if (offset & 0x7) {
2543                               DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2544                               return -EINVAL;
2545                     }
2546                     if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2547                               DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2548                                           offset + 8, radeon_bo_size(reloc->robj));
2549                               return -EINVAL;
2550                     }
2551                     offset += reloc->gpu_offset;
2552                     ib[idx+0] = offset;
2553                     ib[idx+1] = upper_32_bits(offset) & 0xff;
2554                     break;
2555           }
2556           case PACKET3_COPY_DW:
2557                     if (pkt->count != 4) {
2558                               DRM_ERROR("bad COPY_DW (invalid count)\n");
2559                               return -EINVAL;
2560                     }
2561                     if (idx_value & 0x1) {
2562                               u64 offset;
2563                               /* SRC is memory. */
2564                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2565                               if (r) {
2566                                         DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2567                                         return -EINVAL;
2568                               }
2569                               offset = radeon_get_ib_value(p, idx+1);
2570                               offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2571                               if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2572                                         DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2573                                                     offset + 4, radeon_bo_size(reloc->robj));
2574                                         return -EINVAL;
2575                               }
2576                               offset += reloc->gpu_offset;
2577                               ib[idx+1] = offset;
2578                               ib[idx+2] = upper_32_bits(offset) & 0xff;
2579                     } else {
2580                               /* SRC is a reg. */
2581                               reg = radeon_get_ib_value(p, idx+1) << 2;
2582                               if (!evergreen_is_safe_reg(p, reg)) {
2583                                         dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2584                                                    reg, idx + 1);
2585                                         return -EINVAL;
2586                               }
2587                     }
2588                     if (idx_value & 0x2) {
2589                               u64 offset;
2590                               /* DST is memory. */
2591                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2592                               if (r) {
2593                                         DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2594                                         return -EINVAL;
2595                               }
2596                               offset = radeon_get_ib_value(p, idx+3);
2597                               offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2598                               if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2599                                         DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2600                                                     offset + 4, radeon_bo_size(reloc->robj));
2601                                         return -EINVAL;
2602                               }
2603                               offset += reloc->gpu_offset;
2604                               ib[idx+3] = offset;
2605                               ib[idx+4] = upper_32_bits(offset) & 0xff;
2606                     } else {
2607                               /* DST is a reg. */
2608                               reg = radeon_get_ib_value(p, idx+3) << 2;
2609                               if (!evergreen_is_safe_reg(p, reg)) {
2610                                         dev_warn(p->dev, "forbidden register 0x%08x at %d\n",
2611                                                    reg, idx + 3);
2612                                         return -EINVAL;
2613                               }
2614                     }
2615                     break;
2616           case PACKET3_SET_APPEND_CNT:
2617           {
2618                     uint32_t areg;
2619                     uint32_t allowed_reg_base;
2620                     uint32_t source_sel;
2621                     if (pkt->count != 2) {
2622                               DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
2623                               return -EINVAL;
2624                     }
2625 
2626                     allowed_reg_base = GDS_APPEND_COUNT_0;
2627                     allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
2628                     allowed_reg_base >>= 2;
2629 
2630                     areg = idx_value >> 16;
2631                     if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
2632                               dev_warn(p->dev, "forbidden register for append cnt 0x%08x at %d\n",
2633                                          areg, idx);
2634                               return -EINVAL;
2635                     }
2636 
2637                     source_sel = G_PACKET3_SET_APPEND_CNT_SRC_SELECT(idx_value);
2638                     if (source_sel == PACKET3_SAC_SRC_SEL_MEM) {
2639                               uint64_t offset;
2640                               uint32_t swap;
2641                               r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2642                               if (r) {
2643                                         DRM_ERROR("bad SET_APPEND_CNT (missing reloc)\n");
2644                                         return -EINVAL;
2645                               }
2646                               offset = radeon_get_ib_value(p, idx + 1);
2647                               swap = offset & 0x3;
2648                               offset &= ~0x3;
2649 
2650                               offset += ((u64)(radeon_get_ib_value(p, idx + 2) & 0xff)) << 32;
2651 
2652                               offset += reloc->gpu_offset;
2653                               ib[idx+1] = (offset & 0xfffffffc) | swap;
2654                               ib[idx+2] = upper_32_bits(offset) & 0xff;
2655                     } else {
2656                               DRM_ERROR("bad SET_APPEND_CNT (unsupported operation)\n");
2657                               return -EINVAL;
2658                     }
2659                     break;
2660           }
2661           case PACKET3_NOP:
2662                     break;
2663           default:
2664                     DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2665                     return -EINVAL;
2666           }
2667           return 0;
2668 }
2669 
evergreen_cs_parse(struct radeon_cs_parser * p)2670 int evergreen_cs_parse(struct radeon_cs_parser *p)
2671 {
2672           struct radeon_cs_packet pkt;
2673           struct evergreen_cs_track *track;
2674           u32 tmp;
2675           int r;
2676 
2677           if (p->track == NULL) {
2678                     /* initialize tracker, we are in kms */
2679                     track = kzalloc(sizeof(*track), GFP_KERNEL);
2680                     if (track == NULL)
2681                               return -ENOMEM;
2682                     evergreen_cs_track_init(track);
2683                     if (p->rdev->family >= CHIP_CAYMAN) {
2684                               tmp = p->rdev->config.cayman.tile_config;
2685                               track->reg_safe_bm = cayman_reg_safe_bm;
2686                     } else {
2687                               tmp = p->rdev->config.evergreen.tile_config;
2688                               track->reg_safe_bm = evergreen_reg_safe_bm;
2689                     }
2690                     BUILD_BUG_ON(ARRAY_SIZE(cayman_reg_safe_bm) != REG_SAFE_BM_SIZE);
2691                     BUILD_BUG_ON(ARRAY_SIZE(evergreen_reg_safe_bm) != REG_SAFE_BM_SIZE);
2692                     switch (tmp & 0xf) {
2693                     case 0:
2694                               track->npipes = 1;
2695                               break;
2696                     case 1:
2697                     default:
2698                               track->npipes = 2;
2699                               break;
2700                     case 2:
2701                               track->npipes = 4;
2702                               break;
2703                     case 3:
2704                               track->npipes = 8;
2705                               break;
2706                     }
2707 
2708                     switch ((tmp & 0xf0) >> 4) {
2709                     case 0:
2710                               track->nbanks = 4;
2711                               break;
2712                     case 1:
2713                     default:
2714                               track->nbanks = 8;
2715                               break;
2716                     case 2:
2717                               track->nbanks = 16;
2718                               break;
2719                     }
2720 
2721                     switch ((tmp & 0xf00) >> 8) {
2722                     case 0:
2723                               track->group_size = 256;
2724                               break;
2725                     case 1:
2726                     default:
2727                               track->group_size = 512;
2728                               break;
2729                     }
2730 
2731                     switch ((tmp & 0xf000) >> 12) {
2732                     case 0:
2733                               track->row_size = 1;
2734                               break;
2735                     case 1:
2736                     default:
2737                               track->row_size = 2;
2738                               break;
2739                     case 2:
2740                               track->row_size = 4;
2741                               break;
2742                     }
2743 
2744                     p->track = track;
2745           }
2746           do {
2747                     r = radeon_cs_packet_parse(p, &pkt, p->idx);
2748                     if (r) {
2749                               kfree(p->track);
2750                               p->track = NULL;
2751                               return r;
2752                     }
2753                     p->idx += pkt.count + 2;
2754                     switch (pkt.type) {
2755                     case RADEON_PACKET_TYPE0:
2756                               r = evergreen_cs_parse_packet0(p, &pkt);
2757                               break;
2758                     case RADEON_PACKET_TYPE2:
2759                               break;
2760                     case RADEON_PACKET_TYPE3:
2761                               r = evergreen_packet3_check(p, &pkt);
2762                               break;
2763                     default:
2764                               DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2765                               kfree(p->track);
2766                               p->track = NULL;
2767                               return -EINVAL;
2768                     }
2769                     if (r) {
2770                               kfree(p->track);
2771                               p->track = NULL;
2772                               return r;
2773                     }
2774           } while (p->idx < p->chunk_ib->length_dw);
2775 #if 0
2776           for (r = 0; r < p->ib.length_dw; r++) {
2777                     pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
2778                     mdelay(1);
2779           }
2780 #endif
2781           kfree(p->track);
2782           p->track = NULL;
2783           return 0;
2784 }
2785 
2786 /**
2787  * evergreen_dma_cs_parse() - parse the DMA IB
2788  * @p:              parser structure holding parsing context.
2789  *
2790  * Parses the DMA IB from the CS ioctl and updates
2791  * the GPU addresses based on the reloc information and
2792  * checks for errors. (Evergreen-Cayman)
2793  * Returns 0 for success and an error on failure.
2794  **/
evergreen_dma_cs_parse(struct radeon_cs_parser * p)2795 int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2796 {
2797           struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2798           struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2799           u32 header, cmd, count, sub_cmd;
2800           uint32_t *ib = p->ib.ptr;
2801           u32 idx;
2802           u64 src_offset, dst_offset, dst2_offset;
2803           int r;
2804 
2805           do {
2806                     if (p->idx >= ib_chunk->length_dw) {
2807                               DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2808                                           p->idx, ib_chunk->length_dw);
2809                               return -EINVAL;
2810                     }
2811                     idx = p->idx;
2812                     header = radeon_get_ib_value(p, idx);
2813                     cmd = GET_DMA_CMD(header);
2814                     count = GET_DMA_COUNT(header);
2815                     sub_cmd = GET_DMA_SUB_CMD(header);
2816 
2817                     switch (cmd) {
2818                     case DMA_PACKET_WRITE:
2819                               r = r600_dma_cs_next_reloc(p, &dst_reloc);
2820                               if (r) {
2821                                         DRM_ERROR("bad DMA_PACKET_WRITE\n");
2822                                         return -EINVAL;
2823                               }
2824                               switch (sub_cmd) {
2825                               /* tiled */
2826                               case 8:
2827                                         dst_offset = radeon_get_ib_value(p, idx+1);
2828                                         dst_offset <<= 8;
2829 
2830                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2831                                         p->idx += count + 7;
2832                                         break;
2833                               /* linear */
2834                               case 0:
2835                                         dst_offset = radeon_get_ib_value(p, idx+1);
2836                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2837 
2838                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2839                                         ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2840                                         p->idx += count + 3;
2841                                         break;
2842                               default:
2843                                         DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2844                                         return -EINVAL;
2845                               }
2846                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2847                                         dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2848                                                    dst_offset, radeon_bo_size(dst_reloc->robj));
2849                                         return -EINVAL;
2850                               }
2851                               break;
2852                     case DMA_PACKET_COPY:
2853                               r = r600_dma_cs_next_reloc(p, &src_reloc);
2854                               if (r) {
2855                                         DRM_ERROR("bad DMA_PACKET_COPY\n");
2856                                         return -EINVAL;
2857                               }
2858                               r = r600_dma_cs_next_reloc(p, &dst_reloc);
2859                               if (r) {
2860                                         DRM_ERROR("bad DMA_PACKET_COPY\n");
2861                                         return -EINVAL;
2862                               }
2863                               switch (sub_cmd) {
2864                               /* Copy L2L, DW aligned */
2865                               case 0x00:
2866                                         /* L2L, dw */
2867                                         src_offset = radeon_get_ib_value(p, idx+2);
2868                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2869                                         dst_offset = radeon_get_ib_value(p, idx+1);
2870                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2871                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2872                                                   dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2873                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2874                                                   return -EINVAL;
2875                                         }
2876                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2877                                                   dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2878                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2879                                                   return -EINVAL;
2880                                         }
2881                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2882                                         ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2883                                         ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2884                                         ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2885                                         p->idx += 5;
2886                                         break;
2887                               /* Copy L2T/T2L */
2888                               case 0x08:
2889                                         /* detile bit */
2890                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2891                                                   /* tiled src, linear dst */
2892                                                   src_offset = radeon_get_ib_value(p, idx+1);
2893                                                   src_offset <<= 8;
2894                                                   ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2895 
2896                                                   dst_offset = radeon_get_ib_value(p, idx + 7);
2897                                                   dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2898                                                   ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2899                                                   ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2900                                         } else {
2901                                                   /* linear src, tiled dst */
2902                                                   src_offset = radeon_get_ib_value(p, idx+7);
2903                                                   src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2904                                                   ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2905                                                   ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2906 
2907                                                   dst_offset = radeon_get_ib_value(p, idx+1);
2908                                                   dst_offset <<= 8;
2909                                                   ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2910                                         }
2911                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2912                                                   dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2913                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2914                                                   return -EINVAL;
2915                                         }
2916                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2917                                                   dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2918                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2919                                                   return -EINVAL;
2920                                         }
2921                                         p->idx += 9;
2922                                         break;
2923                               /* Copy L2L, byte aligned */
2924                               case 0x40:
2925                                         /* L2L, byte */
2926                                         src_offset = radeon_get_ib_value(p, idx+2);
2927                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2928                                         dst_offset = radeon_get_ib_value(p, idx+1);
2929                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2930                                         if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2931                                                   dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2932                                                                       src_offset + count, radeon_bo_size(src_reloc->robj));
2933                                                   return -EINVAL;
2934                                         }
2935                                         if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2936                                                   dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2937                                                                       dst_offset + count, radeon_bo_size(dst_reloc->robj));
2938                                                   return -EINVAL;
2939                                         }
2940                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2941                                         ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2942                                         ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2943                                         ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2944                                         p->idx += 5;
2945                                         break;
2946                               /* Copy L2L, partial */
2947                               case 0x41:
2948                                         /* L2L, partial */
2949                                         if (p->family < CHIP_CAYMAN) {
2950                                                   DRM_ERROR("L2L Partial is cayman only !\n");
2951                                                   return -EINVAL;
2952                                         }
2953                                         ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2954                                         ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2955                                         ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2956                                         ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2957 
2958                                         p->idx += 9;
2959                                         break;
2960                               /* Copy L2L, DW aligned, broadcast */
2961                               case 0x44:
2962                                         /* L2L, dw, broadcast */
2963                                         r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2964                                         if (r) {
2965                                                   DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2966                                                   return -EINVAL;
2967                                         }
2968                                         dst_offset = radeon_get_ib_value(p, idx+1);
2969                                         dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2970                                         dst2_offset = radeon_get_ib_value(p, idx+2);
2971                                         dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2972                                         src_offset = radeon_get_ib_value(p, idx+3);
2973                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2974                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2975                                                   dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2976                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2977                                                   return -EINVAL;
2978                                         }
2979                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2980                                                   dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2981                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2982                                                   return -EINVAL;
2983                                         }
2984                                         if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2985                                                   dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2986                                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2987                                                   return -EINVAL;
2988                                         }
2989                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2990                                         ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2991                                         ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2992                                         ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2993                                         ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2994                                         ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2995                                         p->idx += 7;
2996                                         break;
2997                               /* Copy L2T Frame to Field */
2998                               case 0x48:
2999                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3000                                                   DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3001                                                   return -EINVAL;
3002                                         }
3003                                         r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3004                                         if (r) {
3005                                                   DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
3006                                                   return -EINVAL;
3007                                         }
3008                                         dst_offset = radeon_get_ib_value(p, idx+1);
3009                                         dst_offset <<= 8;
3010                                         dst2_offset = radeon_get_ib_value(p, idx+2);
3011                                         dst2_offset <<= 8;
3012                                         src_offset = radeon_get_ib_value(p, idx+8);
3013                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3014                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3015                                                   dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
3016                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3017                                                   return -EINVAL;
3018                                         }
3019                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3020                                                   dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3021                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3022                                                   return -EINVAL;
3023                                         }
3024                                         if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3025                                                   dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
3026                                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3027                                                   return -EINVAL;
3028                                         }
3029                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3030                                         ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3031                                         ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3032                                         ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3033                                         p->idx += 10;
3034                                         break;
3035                               /* Copy L2T/T2L, partial */
3036                               case 0x49:
3037                                         /* L2T, T2L partial */
3038                                         if (p->family < CHIP_CAYMAN) {
3039                                                   DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3040                                                   return -EINVAL;
3041                                         }
3042                                         /* detile bit */
3043                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3044                                                   /* tiled src, linear dst */
3045                                                   ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3046 
3047                                                   ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3048                                                   ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3049                                         } else {
3050                                                   /* linear src, tiled dst */
3051                                                   ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3052                                                   ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3053 
3054                                                   ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3055                                         }
3056                                         p->idx += 12;
3057                                         break;
3058                               /* Copy L2T broadcast */
3059                               case 0x4b:
3060                                         /* L2T, broadcast */
3061                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3062                                                   DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3063                                                   return -EINVAL;
3064                                         }
3065                                         r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3066                                         if (r) {
3067                                                   DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3068                                                   return -EINVAL;
3069                                         }
3070                                         dst_offset = radeon_get_ib_value(p, idx+1);
3071                                         dst_offset <<= 8;
3072                                         dst2_offset = radeon_get_ib_value(p, idx+2);
3073                                         dst2_offset <<= 8;
3074                                         src_offset = radeon_get_ib_value(p, idx+8);
3075                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3076                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3077                                                   dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3078                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3079                                                   return -EINVAL;
3080                                         }
3081                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3082                                                   dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3083                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3084                                                   return -EINVAL;
3085                                         }
3086                                         if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3087                                                   dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3088                                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3089                                                   return -EINVAL;
3090                                         }
3091                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3092                                         ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3093                                         ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3094                                         ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3095                                         p->idx += 10;
3096                                         break;
3097                               /* Copy L2T/T2L (tile units) */
3098                               case 0x4c:
3099                                         /* L2T, T2L */
3100                                         /* detile bit */
3101                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3102                                                   /* tiled src, linear dst */
3103                                                   src_offset = radeon_get_ib_value(p, idx+1);
3104                                                   src_offset <<= 8;
3105                                                   ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3106 
3107                                                   dst_offset = radeon_get_ib_value(p, idx+7);
3108                                                   dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3109                                                   ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3110                                                   ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3111                                         } else {
3112                                                   /* linear src, tiled dst */
3113                                                   src_offset = radeon_get_ib_value(p, idx+7);
3114                                                   src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3115                                                   ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3116                                                   ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3117 
3118                                                   dst_offset = radeon_get_ib_value(p, idx+1);
3119                                                   dst_offset <<= 8;
3120                                                   ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3121                                         }
3122                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3123                                                   dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3124                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3125                                                   return -EINVAL;
3126                                         }
3127                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3128                                                   dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3129                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3130                                                   return -EINVAL;
3131                                         }
3132                                         p->idx += 9;
3133                                         break;
3134                               /* Copy T2T, partial (tile units) */
3135                               case 0x4d:
3136                                         /* T2T partial */
3137                                         if (p->family < CHIP_CAYMAN) {
3138                                                   DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3139                                                   return -EINVAL;
3140                                         }
3141                                         ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3142                                         ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3143                                         p->idx += 13;
3144                                         break;
3145                               /* Copy L2T broadcast (tile units) */
3146                               case 0x4f:
3147                                         /* L2T, broadcast */
3148                                         if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3149                                                   DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3150                                                   return -EINVAL;
3151                                         }
3152                                         r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3153                                         if (r) {
3154                                                   DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3155                                                   return -EINVAL;
3156                                         }
3157                                         dst_offset = radeon_get_ib_value(p, idx+1);
3158                                         dst_offset <<= 8;
3159                                         dst2_offset = radeon_get_ib_value(p, idx+2);
3160                                         dst2_offset <<= 8;
3161                                         src_offset = radeon_get_ib_value(p, idx+8);
3162                                         src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3163                                         if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3164                                                   dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3165                                                                       src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3166                                                   return -EINVAL;
3167                                         }
3168                                         if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3169                                                   dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3170                                                                       dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3171                                                   return -EINVAL;
3172                                         }
3173                                         if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3174                                                   dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3175                                                                       dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3176                                                   return -EINVAL;
3177                                         }
3178                                         ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3179                                         ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3180                                         ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3181                                         ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3182                                         p->idx += 10;
3183                                         break;
3184                               default:
3185                                         DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3186                                         return -EINVAL;
3187                               }
3188                               break;
3189                     case DMA_PACKET_CONSTANT_FILL:
3190                               r = r600_dma_cs_next_reloc(p, &dst_reloc);
3191                               if (r) {
3192                                         DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3193                                         return -EINVAL;
3194                               }
3195                               dst_offset = radeon_get_ib_value(p, idx+1);
3196                               dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3197                               if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3198                                         dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3199                                                    dst_offset, radeon_bo_size(dst_reloc->robj));
3200                                         return -EINVAL;
3201                               }
3202                               ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3203                               ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3204                               p->idx += 4;
3205                               break;
3206                     case DMA_PACKET_NOP:
3207                               p->idx += 1;
3208                               break;
3209                     default:
3210                               DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3211                               return -EINVAL;
3212                     }
3213           } while (p->idx < p->chunk_ib->length_dw);
3214 #if 0
3215           for (r = 0; r < p->ib->length_dw; r++) {
3216                     pr_info("%05d  0x%08X\n", r, p->ib.ptr[r]);
3217                     mdelay(1);
3218           }
3219 #endif
3220           return 0;
3221 }
3222 
3223 /* vm parser */
evergreen_vm_reg_valid(u32 reg)3224 static bool evergreen_vm_reg_valid(u32 reg)
3225 {
3226           /* context regs are fine */
3227           if (reg >= 0x28000)
3228                     return true;
3229 
3230           /* check config regs */
3231           switch (reg) {
3232           case WAIT_UNTIL:
3233           case GRBM_GFX_INDEX:
3234           case CP_STRMOUT_CNTL:
3235           case CP_COHER_CNTL:
3236           case CP_COHER_SIZE:
3237           case VGT_VTX_VECT_EJECT_REG:
3238           case VGT_CACHE_INVALIDATION:
3239           case VGT_GS_VERTEX_REUSE:
3240           case VGT_PRIMITIVE_TYPE:
3241           case VGT_INDEX_TYPE:
3242           case VGT_NUM_INDICES:
3243           case VGT_NUM_INSTANCES:
3244           case VGT_COMPUTE_DIM_X:
3245           case VGT_COMPUTE_DIM_Y:
3246           case VGT_COMPUTE_DIM_Z:
3247           case VGT_COMPUTE_START_X:
3248           case VGT_COMPUTE_START_Y:
3249           case VGT_COMPUTE_START_Z:
3250           case VGT_COMPUTE_INDEX:
3251           case VGT_COMPUTE_THREAD_GROUP_SIZE:
3252           case VGT_HS_OFFCHIP_PARAM:
3253           case PA_CL_ENHANCE:
3254           case PA_SU_LINE_STIPPLE_VALUE:
3255           case PA_SC_LINE_STIPPLE_STATE:
3256           case PA_SC_ENHANCE:
3257           case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3258           case SQ_DYN_GPR_SIMD_LOCK_EN:
3259           case SQ_CONFIG:
3260           case SQ_GPR_RESOURCE_MGMT_1:
3261           case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3262           case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3263           case SQ_CONST_MEM_BASE:
3264           case SQ_STATIC_THREAD_MGMT_1:
3265           case SQ_STATIC_THREAD_MGMT_2:
3266           case SQ_STATIC_THREAD_MGMT_3:
3267           case SPI_CONFIG_CNTL:
3268           case SPI_CONFIG_CNTL_1:
3269           case TA_CNTL_AUX:
3270           case DB_DEBUG:
3271           case DB_DEBUG2:
3272           case DB_DEBUG3:
3273           case DB_DEBUG4:
3274           case DB_WATERMARKS:
3275           case TD_PS_BORDER_COLOR_INDEX:
3276           case TD_PS_BORDER_COLOR_RED:
3277           case TD_PS_BORDER_COLOR_GREEN:
3278           case TD_PS_BORDER_COLOR_BLUE:
3279           case TD_PS_BORDER_COLOR_ALPHA:
3280           case TD_VS_BORDER_COLOR_INDEX:
3281           case TD_VS_BORDER_COLOR_RED:
3282           case TD_VS_BORDER_COLOR_GREEN:
3283           case TD_VS_BORDER_COLOR_BLUE:
3284           case TD_VS_BORDER_COLOR_ALPHA:
3285           case TD_GS_BORDER_COLOR_INDEX:
3286           case TD_GS_BORDER_COLOR_RED:
3287           case TD_GS_BORDER_COLOR_GREEN:
3288           case TD_GS_BORDER_COLOR_BLUE:
3289           case TD_GS_BORDER_COLOR_ALPHA:
3290           case TD_HS_BORDER_COLOR_INDEX:
3291           case TD_HS_BORDER_COLOR_RED:
3292           case TD_HS_BORDER_COLOR_GREEN:
3293           case TD_HS_BORDER_COLOR_BLUE:
3294           case TD_HS_BORDER_COLOR_ALPHA:
3295           case TD_LS_BORDER_COLOR_INDEX:
3296           case TD_LS_BORDER_COLOR_RED:
3297           case TD_LS_BORDER_COLOR_GREEN:
3298           case TD_LS_BORDER_COLOR_BLUE:
3299           case TD_LS_BORDER_COLOR_ALPHA:
3300           case TD_CS_BORDER_COLOR_INDEX:
3301           case TD_CS_BORDER_COLOR_RED:
3302           case TD_CS_BORDER_COLOR_GREEN:
3303           case TD_CS_BORDER_COLOR_BLUE:
3304           case TD_CS_BORDER_COLOR_ALPHA:
3305           case SQ_ESGS_RING_SIZE:
3306           case SQ_GSVS_RING_SIZE:
3307           case SQ_ESTMP_RING_SIZE:
3308           case SQ_GSTMP_RING_SIZE:
3309           case SQ_HSTMP_RING_SIZE:
3310           case SQ_LSTMP_RING_SIZE:
3311           case SQ_PSTMP_RING_SIZE:
3312           case SQ_VSTMP_RING_SIZE:
3313           case SQ_ESGS_RING_ITEMSIZE:
3314           case SQ_ESTMP_RING_ITEMSIZE:
3315           case SQ_GSTMP_RING_ITEMSIZE:
3316           case SQ_GSVS_RING_ITEMSIZE:
3317           case SQ_GS_VERT_ITEMSIZE:
3318           case SQ_GS_VERT_ITEMSIZE_1:
3319           case SQ_GS_VERT_ITEMSIZE_2:
3320           case SQ_GS_VERT_ITEMSIZE_3:
3321           case SQ_GSVS_RING_OFFSET_1:
3322           case SQ_GSVS_RING_OFFSET_2:
3323           case SQ_GSVS_RING_OFFSET_3:
3324           case SQ_HSTMP_RING_ITEMSIZE:
3325           case SQ_LSTMP_RING_ITEMSIZE:
3326           case SQ_PSTMP_RING_ITEMSIZE:
3327           case SQ_VSTMP_RING_ITEMSIZE:
3328           case VGT_TF_RING_SIZE:
3329           case SQ_ESGS_RING_BASE:
3330           case SQ_GSVS_RING_BASE:
3331           case SQ_ESTMP_RING_BASE:
3332           case SQ_GSTMP_RING_BASE:
3333           case SQ_HSTMP_RING_BASE:
3334           case SQ_LSTMP_RING_BASE:
3335           case SQ_PSTMP_RING_BASE:
3336           case SQ_VSTMP_RING_BASE:
3337           case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3338           case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3339                     return true;
3340           default:
3341                     DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3342                     return false;
3343           }
3344 }
3345 
evergreen_vm_packet3_check(struct radeon_device * rdev,u32 * ib,struct radeon_cs_packet * pkt)3346 static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3347                                               u32 *ib, struct radeon_cs_packet *pkt)
3348 {
3349           u32 idx = pkt->idx + 1;
3350           u32 idx_value = ib[idx];
3351           u32 start_reg, end_reg, reg, i;
3352           u32 command, info;
3353 
3354           switch (pkt->opcode) {
3355           case PACKET3_NOP:
3356                     break;
3357           case PACKET3_SET_BASE:
3358                     if (idx_value != 1) {
3359                               DRM_ERROR("bad SET_BASE");
3360                               return -EINVAL;
3361                     }
3362                     break;
3363           case PACKET3_CLEAR_STATE:
3364           case PACKET3_INDEX_BUFFER_SIZE:
3365           case PACKET3_DISPATCH_DIRECT:
3366           case PACKET3_DISPATCH_INDIRECT:
3367           case PACKET3_MODE_CONTROL:
3368           case PACKET3_SET_PREDICATION:
3369           case PACKET3_COND_EXEC:
3370           case PACKET3_PRED_EXEC:
3371           case PACKET3_DRAW_INDIRECT:
3372           case PACKET3_DRAW_INDEX_INDIRECT:
3373           case PACKET3_INDEX_BASE:
3374           case PACKET3_DRAW_INDEX_2:
3375           case PACKET3_CONTEXT_CONTROL:
3376           case PACKET3_DRAW_INDEX_OFFSET:
3377           case PACKET3_INDEX_TYPE:
3378           case PACKET3_DRAW_INDEX:
3379           case PACKET3_DRAW_INDEX_AUTO:
3380           case PACKET3_DRAW_INDEX_IMMD:
3381           case PACKET3_NUM_INSTANCES:
3382           case PACKET3_DRAW_INDEX_MULTI_AUTO:
3383           case PACKET3_STRMOUT_BUFFER_UPDATE:
3384           case PACKET3_DRAW_INDEX_OFFSET_2:
3385           case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3386           case PACKET3_MPEG_INDEX:
3387           case PACKET3_WAIT_REG_MEM:
3388           case PACKET3_MEM_WRITE:
3389           case PACKET3_PFP_SYNC_ME:
3390           case PACKET3_SURFACE_SYNC:
3391           case PACKET3_EVENT_WRITE:
3392           case PACKET3_EVENT_WRITE_EOP:
3393           case PACKET3_EVENT_WRITE_EOS:
3394           case PACKET3_SET_CONTEXT_REG:
3395           case PACKET3_SET_BOOL_CONST:
3396           case PACKET3_SET_LOOP_CONST:
3397           case PACKET3_SET_RESOURCE:
3398           case PACKET3_SET_SAMPLER:
3399           case PACKET3_SET_CTL_CONST:
3400           case PACKET3_SET_RESOURCE_OFFSET:
3401           case PACKET3_SET_CONTEXT_REG_INDIRECT:
3402           case PACKET3_SET_RESOURCE_INDIRECT:
3403           case CAYMAN_PACKET3_DEALLOC_STATE:
3404                     break;
3405           case PACKET3_COND_WRITE:
3406                     if (idx_value & 0x100) {
3407                               reg = ib[idx + 5] * 4;
3408                               if (!evergreen_vm_reg_valid(reg))
3409                                         return -EINVAL;
3410                     }
3411                     break;
3412           case PACKET3_COPY_DW:
3413                     if (idx_value & 0x2) {
3414                               reg = ib[idx + 3] * 4;
3415                               if (!evergreen_vm_reg_valid(reg))
3416                                         return -EINVAL;
3417                     }
3418                     break;
3419           case PACKET3_SET_CONFIG_REG:
3420                     start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3421                     end_reg = 4 * pkt->count + start_reg - 4;
3422                     if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3423                         (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3424                         (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3425                               DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3426                               return -EINVAL;
3427                     }
3428                     for (i = 0; i < pkt->count; i++) {
3429                               reg = start_reg + (4 * i);
3430                               if (!evergreen_vm_reg_valid(reg))
3431                                         return -EINVAL;
3432                     }
3433                     break;
3434           case PACKET3_CP_DMA:
3435                     command = ib[idx + 4];
3436                     info = ib[idx + 1];
3437                     if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3438                         (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3439                         ((((info & 0x00300000) >> 20) == 0) &&
3440                          (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3441                         ((((info & 0x60000000) >> 29) == 0) &&
3442                          (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3443                               /* non mem to mem copies requires dw aligned count */
3444                               if ((command & 0x1fffff) % 4) {
3445                                         DRM_ERROR("CP DMA command requires dw count alignment\n");
3446                                         return -EINVAL;
3447                               }
3448                     }
3449                     if (command & PACKET3_CP_DMA_CMD_SAS) {
3450                               /* src address space is register */
3451                               if (((info & 0x60000000) >> 29) == 0) {
3452                                         start_reg = idx_value << 2;
3453                                         if (command & PACKET3_CP_DMA_CMD_SAIC) {
3454                                                   reg = start_reg;
3455                                                   if (!evergreen_vm_reg_valid(reg)) {
3456                                                             DRM_ERROR("CP DMA Bad SRC register\n");
3457                                                             return -EINVAL;
3458                                                   }
3459                                         } else {
3460                                                   for (i = 0; i < (command & 0x1fffff); i++) {
3461                                                             reg = start_reg + (4 * i);
3462                                                             if (!evergreen_vm_reg_valid(reg)) {
3463                                                                       DRM_ERROR("CP DMA Bad SRC register\n");
3464                                                                       return -EINVAL;
3465                                                             }
3466                                                   }
3467                                         }
3468                               }
3469                     }
3470                     if (command & PACKET3_CP_DMA_CMD_DAS) {
3471                               /* dst address space is register */
3472                               if (((info & 0x00300000) >> 20) == 0) {
3473                                         start_reg = ib[idx + 2];
3474                                         if (command & PACKET3_CP_DMA_CMD_DAIC) {
3475                                                   reg = start_reg;
3476                                                   if (!evergreen_vm_reg_valid(reg)) {
3477                                                             DRM_ERROR("CP DMA Bad DST register\n");
3478                                                             return -EINVAL;
3479                                                   }
3480                                         } else {
3481                                                   for (i = 0; i < (command & 0x1fffff); i++) {
3482                                                             reg = start_reg + (4 * i);
3483                                                             if (!evergreen_vm_reg_valid(reg)) {
3484                                                                       DRM_ERROR("CP DMA Bad DST register\n");
3485                                                                       return -EINVAL;
3486                                                             }
3487                                                   }
3488                                         }
3489                               }
3490                     }
3491                     break;
3492           case PACKET3_SET_APPEND_CNT: {
3493                     uint32_t areg;
3494                     uint32_t allowed_reg_base;
3495 
3496                     if (pkt->count != 2) {
3497                               DRM_ERROR("bad SET_APPEND_CNT (invalid count)\n");
3498                               return -EINVAL;
3499                     }
3500 
3501                     allowed_reg_base = GDS_APPEND_COUNT_0;
3502                     allowed_reg_base -= PACKET3_SET_CONTEXT_REG_START;
3503                     allowed_reg_base >>= 2;
3504 
3505                     areg = idx_value >> 16;
3506                     if (areg < allowed_reg_base || areg > (allowed_reg_base + 11)) {
3507                               DRM_ERROR("forbidden register for append cnt 0x%08x at %d\n",
3508                                           areg, idx);
3509                               return -EINVAL;
3510                     }
3511                     break;
3512           }
3513           default:
3514                     return -EINVAL;
3515           }
3516           return 0;
3517 }
3518 
evergreen_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3519 int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3520 {
3521           int ret = 0;
3522           u32 idx = 0;
3523           struct radeon_cs_packet pkt;
3524 
3525           do {
3526                     pkt.idx = idx;
3527                     pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3528                     pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3529                     pkt.one_reg_wr = 0;
3530                     switch (pkt.type) {
3531                     case RADEON_PACKET_TYPE0:
3532                               dev_err(rdev->dev, "Packet0 not allowed!\n");
3533                               ret = -EINVAL;
3534                               break;
3535                     case RADEON_PACKET_TYPE2:
3536                               idx += 1;
3537                               break;
3538                     case RADEON_PACKET_TYPE3:
3539                               pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3540                               ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3541                               idx += pkt.count + 2;
3542                               break;
3543                     default:
3544                               dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3545                               ret = -EINVAL;
3546                               break;
3547                     }
3548                     if (ret)
3549                               break;
3550           } while (idx < ib->length_dw);
3551 
3552           return ret;
3553 }
3554 
3555 /**
3556  * evergreen_dma_ib_parse() - parse the DMA IB for VM
3557  * @rdev: radeon_device pointer
3558  * @ib:   radeon_ib pointer
3559  *
3560  * Parses the DMA IB from the VM CS ioctl
3561  * checks for errors. (Cayman-SI)
3562  * Returns 0 for success and an error on failure.
3563  **/
evergreen_dma_ib_parse(struct radeon_device * rdev,struct radeon_ib * ib)3564 int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3565 {
3566           u32 idx = 0;
3567           u32 header, cmd, count, sub_cmd;
3568 
3569           do {
3570                     header = ib->ptr[idx];
3571                     cmd = GET_DMA_CMD(header);
3572                     count = GET_DMA_COUNT(header);
3573                     sub_cmd = GET_DMA_SUB_CMD(header);
3574 
3575                     switch (cmd) {
3576                     case DMA_PACKET_WRITE:
3577                               switch (sub_cmd) {
3578                               /* tiled */
3579                               case 8:
3580                                         idx += count + 7;
3581                                         break;
3582                               /* linear */
3583                               case 0:
3584                                         idx += count + 3;
3585                                         break;
3586                               default:
3587                                         DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3588                                         return -EINVAL;
3589                               }
3590                               break;
3591                     case DMA_PACKET_COPY:
3592                               switch (sub_cmd) {
3593                               /* Copy L2L, DW aligned */
3594                               case 0x00:
3595                                         idx += 5;
3596                                         break;
3597                               /* Copy L2T/T2L */
3598                               case 0x08:
3599                                         idx += 9;
3600                                         break;
3601                               /* Copy L2L, byte aligned */
3602                               case 0x40:
3603                                         idx += 5;
3604                                         break;
3605                               /* Copy L2L, partial */
3606                               case 0x41:
3607                                         idx += 9;
3608                                         break;
3609                               /* Copy L2L, DW aligned, broadcast */
3610                               case 0x44:
3611                                         idx += 7;
3612                                         break;
3613                               /* Copy L2T Frame to Field */
3614                               case 0x48:
3615                                         idx += 10;
3616                                         break;
3617                               /* Copy L2T/T2L, partial */
3618                               case 0x49:
3619                                         idx += 12;
3620                                         break;
3621                               /* Copy L2T broadcast */
3622                               case 0x4b:
3623                                         idx += 10;
3624                                         break;
3625                               /* Copy L2T/T2L (tile units) */
3626                               case 0x4c:
3627                                         idx += 9;
3628                                         break;
3629                               /* Copy T2T, partial (tile units) */
3630                               case 0x4d:
3631                                         idx += 13;
3632                                         break;
3633                               /* Copy L2T broadcast (tile units) */
3634                               case 0x4f:
3635                                         idx += 10;
3636                                         break;
3637                               default:
3638                                         DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3639                                         return -EINVAL;
3640                               }
3641                               break;
3642                     case DMA_PACKET_CONSTANT_FILL:
3643                               idx += 4;
3644                               break;
3645                     case DMA_PACKET_NOP:
3646                               idx += 1;
3647                               break;
3648                     default:
3649                               DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3650                               return -EINVAL;
3651                     }
3652           } while (idx < ib->length_dw);
3653 
3654           return 0;
3655 }
3656