xref: /dragonfly/sys/dev/drm/i915/intel_engine_cs.c (revision 3f2dd94a569761201b5b0a18b2f697f97fe1b9dc)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <drm/drm_print.h>
26 
27 #include "i915_drv.h"
28 #include "i915_vgpu.h"
29 #include "intel_ringbuffer.h"
30 #include "intel_lrc.h"
31 
32 /* Haswell does have the CXT_SIZE register however it does not appear to be
33  * valid. Now, docs explain in dwords what is in the context object. The full
34  * size is 70720 bytes, however, the power context and execlist context will
35  * never be saved (power context is stored elsewhere, and execlists don't work
36  * on HSW) - so the final size, including the extra state required for the
37  * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
38  */
39 #define HSW_CXT_TOTAL_SIZE              (17 * PAGE_SIZE)
40 /* Same as Haswell, but 72064 bytes now. */
41 #define GEN8_CXT_TOTAL_SIZE             (18 * PAGE_SIZE)
42 
43 #define GEN8_LR_CONTEXT_RENDER_SIZE     (20 * PAGE_SIZE)
44 #define GEN9_LR_CONTEXT_RENDER_SIZE     (22 * PAGE_SIZE)
45 #define GEN10_LR_CONTEXT_RENDER_SIZE    (18 * PAGE_SIZE)
46 
47 #define GEN8_LR_CONTEXT_OTHER_SIZE      ( 2 * PAGE_SIZE)
48 
49 struct engine_class_info {
50           const char *name;
51           int (*init_legacy)(struct intel_engine_cs *engine);
52           int (*init_execlists)(struct intel_engine_cs *engine);
53 };
54 
55 static const struct engine_class_info intel_engine_classes[] = {
56           [RENDER_CLASS] = {
57                     .name = "rcs",
58                     .init_execlists = logical_render_ring_init,
59                     .init_legacy = intel_init_render_ring_buffer,
60           },
61           [COPY_ENGINE_CLASS] = {
62                     .name = "bcs",
63                     .init_execlists = logical_xcs_ring_init,
64                     .init_legacy = intel_init_blt_ring_buffer,
65           },
66           [VIDEO_DECODE_CLASS] = {
67                     .name = "vcs",
68                     .init_execlists = logical_xcs_ring_init,
69                     .init_legacy = intel_init_bsd_ring_buffer,
70           },
71           [VIDEO_ENHANCEMENT_CLASS] = {
72                     .name = "vecs",
73                     .init_execlists = logical_xcs_ring_init,
74                     .init_legacy = intel_init_vebox_ring_buffer,
75           },
76 };
77 
78 struct engine_info {
79           unsigned int hw_id;
80           unsigned int uabi_id;
81           u8 class;
82           u8 instance;
83           u32 mmio_base;
84           unsigned irq_shift;
85 };
86 
87 static const struct engine_info intel_engines[] = {
88           [RCS] = {
89                     .hw_id = RCS_HW,
90                     .uabi_id = I915_EXEC_RENDER,
91                     .class = RENDER_CLASS,
92                     .instance = 0,
93                     .mmio_base = RENDER_RING_BASE,
94                     .irq_shift = GEN8_RCS_IRQ_SHIFT,
95           },
96           [BCS] = {
97                     .hw_id = BCS_HW,
98                     .uabi_id = I915_EXEC_BLT,
99                     .class = COPY_ENGINE_CLASS,
100                     .instance = 0,
101                     .mmio_base = BLT_RING_BASE,
102                     .irq_shift = GEN8_BCS_IRQ_SHIFT,
103           },
104           [VCS] = {
105                     .hw_id = VCS_HW,
106                     .uabi_id = I915_EXEC_BSD,
107                     .class = VIDEO_DECODE_CLASS,
108                     .instance = 0,
109                     .mmio_base = GEN6_BSD_RING_BASE,
110                     .irq_shift = GEN8_VCS1_IRQ_SHIFT,
111           },
112           [VCS2] = {
113                     .hw_id = VCS2_HW,
114                     .uabi_id = I915_EXEC_BSD,
115                     .class = VIDEO_DECODE_CLASS,
116                     .instance = 1,
117                     .mmio_base = GEN8_BSD2_RING_BASE,
118                     .irq_shift = GEN8_VCS2_IRQ_SHIFT,
119           },
120           [VECS] = {
121                     .hw_id = VECS_HW,
122                     .uabi_id = I915_EXEC_VEBOX,
123                     .class = VIDEO_ENHANCEMENT_CLASS,
124                     .instance = 0,
125                     .mmio_base = VEBOX_RING_BASE,
126                     .irq_shift = GEN8_VECS_IRQ_SHIFT,
127           },
128 };
129 
130 /**
131  * ___intel_engine_context_size() - return the size of the context for an engine
132  * @dev_priv: i915 device private
133  * @class: engine class
134  *
135  * Each engine class may require a different amount of space for a context
136  * image.
137  *
138  * Return: size (in bytes) of an engine class specific context image
139  *
140  * Note: this size includes the HWSP, which is part of the context image
141  * in LRC mode, but does not include the "shared data page" used with
142  * GuC submission. The caller should account for this if using the GuC.
143  */
144 static u32
__intel_engine_context_size(struct drm_i915_private * dev_priv,u8 class)145 __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
146 {
147           u32 cxt_size;
148 
149           BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
150 
151           switch (class) {
152           case RENDER_CLASS:
153                     switch (INTEL_GEN(dev_priv)) {
154                     default:
155                               MISSING_CASE(INTEL_GEN(dev_priv));
156                     case 10:
157                               return GEN10_LR_CONTEXT_RENDER_SIZE;
158                     case 9:
159                               return GEN9_LR_CONTEXT_RENDER_SIZE;
160                     case 8:
161                               return i915_modparams.enable_execlists ?
162                                      GEN8_LR_CONTEXT_RENDER_SIZE :
163                                      GEN8_CXT_TOTAL_SIZE;
164                     case 7:
165                               if (IS_HASWELL(dev_priv))
166                                         return HSW_CXT_TOTAL_SIZE;
167 
168                               cxt_size = I915_READ(GEN7_CXT_SIZE);
169                               return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
170                                                   PAGE_SIZE);
171                     case 6:
172                               cxt_size = I915_READ(CXT_SIZE);
173                               return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
174                                                   PAGE_SIZE);
175                     case 5:
176                     case 4:
177                     case 3:
178                     case 2:
179                     /* For the special day when i810 gets merged. */
180                     case 1:
181                               return 0;
182                     }
183                     break;
184           default:
185                     MISSING_CASE(class);
186           case VIDEO_DECODE_CLASS:
187           case VIDEO_ENHANCEMENT_CLASS:
188           case COPY_ENGINE_CLASS:
189                     if (INTEL_GEN(dev_priv) < 8)
190                               return 0;
191                     return GEN8_LR_CONTEXT_OTHER_SIZE;
192           }
193 }
194 
195 static int
intel_engine_setup(struct drm_i915_private * dev_priv,enum intel_engine_id id)196 intel_engine_setup(struct drm_i915_private *dev_priv,
197                        enum intel_engine_id id)
198 {
199           const struct engine_info *info = &intel_engines[id];
200           const struct engine_class_info *class_info;
201           struct intel_engine_cs *engine;
202 
203           GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
204           class_info = &intel_engine_classes[info->class];
205 
206           GEM_BUG_ON(dev_priv->engine[id]);
207           engine = kzalloc(sizeof(*engine), GFP_KERNEL);
208           if (!engine)
209                     return -ENOMEM;
210 
211           engine->id = id;
212           engine->i915 = dev_priv;
213           WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
214                                class_info->name, info->instance) >=
215                     sizeof(engine->name));
216           engine->uabi_id = info->uabi_id;
217           engine->hw_id = engine->guc_id = info->hw_id;
218           engine->mmio_base = info->mmio_base;
219           engine->irq_shift = info->irq_shift;
220           engine->class = info->class;
221           engine->instance = info->instance;
222 
223           engine->context_size = __intel_engine_context_size(dev_priv,
224                                                                          engine->class);
225           if (WARN_ON(engine->context_size > BIT(20)))
226                     engine->context_size = 0;
227 
228           /* Nothing to do here, execute in order of dependencies */
229           engine->schedule = NULL;
230 
231           ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
232 
233           dev_priv->engine[id] = engine;
234           return 0;
235 }
236 
237 /**
238  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
239  * @dev_priv: i915 device private
240  *
241  * Return: non-zero if the initialization failed.
242  */
intel_engines_init_mmio(struct drm_i915_private * dev_priv)243 int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
244 {
245           struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
246           const unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask;
247           struct intel_engine_cs *engine;
248           enum intel_engine_id id;
249           unsigned int mask = 0;
250           unsigned int i;
251           int err;
252 
253           WARN_ON(ring_mask == 0);
254           WARN_ON(ring_mask &
255                     GENMASK(sizeof(mask) * BITS_PER_BYTE - 1, I915_NUM_ENGINES));
256 
257           for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
258                     if (!HAS_ENGINE(dev_priv, i))
259                               continue;
260 
261                     err = intel_engine_setup(dev_priv, i);
262                     if (err)
263                               goto cleanup;
264 
265                     mask |= ENGINE_MASK(i);
266           }
267 
268           /*
269            * Catch failures to update intel_engines table when the new engines
270            * are added to the driver by a warning and disabling the forgotten
271            * engines.
272            */
273           if (WARN_ON(mask != ring_mask))
274                     device_info->ring_mask = mask;
275 
276           /* We always presume we have at least RCS available for later probing */
277           if (WARN_ON(!HAS_ENGINE(dev_priv, RCS))) {
278                     err = -ENODEV;
279                     goto cleanup;
280           }
281 
282           device_info->num_rings = hweight32(mask);
283 
284           return 0;
285 
286 cleanup:
287           for_each_engine(engine, dev_priv, id)
288                     kfree(engine);
289           return err;
290 }
291 
292 /**
293  * intel_engines_init() - init the Engine Command Streamers
294  * @dev_priv: i915 device private
295  *
296  * Return: non-zero if the initialization failed.
297  */
intel_engines_init(struct drm_i915_private * dev_priv)298 int intel_engines_init(struct drm_i915_private *dev_priv)
299 {
300           struct intel_engine_cs *engine;
301           enum intel_engine_id id, err_id;
302           int err;
303 
304           for_each_engine(engine, dev_priv, id) {
305                     const struct engine_class_info *class_info =
306                               &intel_engine_classes[engine->class];
307                     int (*init)(struct intel_engine_cs *engine);
308 
309                     if (i915_modparams.enable_execlists)
310                               init = class_info->init_execlists;
311                     else
312                               init = class_info->init_legacy;
313 
314                     err = -EINVAL;
315                     err_id = id;
316 
317                     if (GEM_WARN_ON(!init))
318                               goto cleanup;
319 
320                     err = init(engine);
321                     if (err)
322                               goto cleanup;
323 
324                     GEM_BUG_ON(!engine->submit_request);
325           }
326 
327           return 0;
328 
329 cleanup:
330           for_each_engine(engine, dev_priv, id) {
331                     if (id >= err_id) {
332                               kfree(engine);
333                               dev_priv->engine[id] = NULL;
334                     } else {
335                               dev_priv->gt.cleanup_engine(engine);
336                     }
337           }
338           return err;
339 }
340 
intel_engine_init_global_seqno(struct intel_engine_cs * engine,u32 seqno)341 void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
342 {
343           struct drm_i915_private *dev_priv = engine->i915;
344 
345           /* Our semaphore implementation is strictly monotonic (i.e. we proceed
346            * so long as the semaphore value in the register/page is greater
347            * than the sync value), so whenever we reset the seqno,
348            * so long as we reset the tracking semaphore value to 0, it will
349            * always be before the next request's seqno. If we don't reset
350            * the semaphore value, then when the seqno moves backwards all
351            * future waits will complete instantly (causing rendering corruption).
352            */
353           if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
354                     I915_WRITE(RING_SYNC_0(engine->mmio_base), 0);
355                     I915_WRITE(RING_SYNC_1(engine->mmio_base), 0);
356                     if (HAS_VEBOX(dev_priv))
357                               I915_WRITE(RING_SYNC_2(engine->mmio_base), 0);
358           }
359           if (dev_priv->semaphore) {
360                     struct page *page = i915_vma_first_page(dev_priv->semaphore);
361                     void *semaphores;
362 
363                     /* Semaphores are in noncoherent memory, flush to be safe */
364                     semaphores = kmap_atomic(page);
365                     memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
366                            0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
367                     drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
368                                                I915_NUM_ENGINES * gen8_semaphore_seqno_size);
369                     kunmap_atomic(semaphores);
370           }
371 
372           intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
373           clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
374 
375           /* After manually advancing the seqno, fake the interrupt in case
376            * there are any waiters for that seqno.
377            */
378           intel_engine_wakeup(engine);
379 
380           GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
381 }
382 
intel_engine_init_timeline(struct intel_engine_cs * engine)383 static void intel_engine_init_timeline(struct intel_engine_cs *engine)
384 {
385           engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
386 }
387 
csb_force_mmio(struct drm_i915_private * i915)388 static bool csb_force_mmio(struct drm_i915_private *i915)
389 {
390           /*
391            * IOMMU adds unpredictable latency causing the CSB write (from the
392            * GPU into the HWSP) to only be visible some time after the interrupt
393            * (missed breadcrumb syndrome).
394            */
395           if (intel_vtd_active())
396                     return true;
397 
398           /* Older GVT emulation depends upon intercepting CSB mmio */
399           if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
400                     return true;
401 
402           return false;
403 }
404 
intel_engine_init_execlist(struct intel_engine_cs * engine)405 static void intel_engine_init_execlist(struct intel_engine_cs *engine)
406 {
407           struct intel_engine_execlists * const execlists = &engine->execlists;
408 
409           execlists->csb_use_mmio = csb_force_mmio(engine->i915);
410 
411           execlists->port_mask = 1;
412           GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); /* From Linux 5.0 */
413           GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
414 
415           execlists->queue = LINUX_RB_ROOT;
416           execlists->first = NULL;
417 }
418 
419 /**
420  * intel_engines_setup_common - setup engine state not requiring hw access
421  * @engine: Engine to setup.
422  *
423  * Initializes @engine@ structure members shared between legacy and execlists
424  * submission modes which do not require hardware access.
425  *
426  * Typically done early in the submission mode specific engine setup stage.
427  */
intel_engine_setup_common(struct intel_engine_cs * engine)428 void intel_engine_setup_common(struct intel_engine_cs *engine)
429 {
430           intel_engine_init_execlist(engine);
431 
432           intel_engine_init_timeline(engine);
433           intel_engine_init_hangcheck(engine);
434           i915_gem_batch_pool_init(engine, &engine->batch_pool);
435 
436           intel_engine_init_cmd_parser(engine);
437 }
438 
intel_engine_create_scratch(struct intel_engine_cs * engine,int size)439 int intel_engine_create_scratch(struct intel_engine_cs *engine, int size)
440 {
441           struct drm_i915_gem_object *obj;
442           struct i915_vma *vma;
443           int ret;
444 
445           WARN_ON(engine->scratch);
446 
447           obj = i915_gem_object_create_stolen(engine->i915, size);
448           if (!obj)
449                     obj = i915_gem_object_create_internal(engine->i915, size);
450           if (IS_ERR(obj)) {
451                     DRM_ERROR("Failed to allocate scratch page\n");
452                     return PTR_ERR(obj);
453           }
454 
455           vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
456           if (IS_ERR(vma)) {
457                     ret = PTR_ERR(vma);
458                     goto err_unref;
459           }
460 
461           ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH);
462           if (ret)
463                     goto err_unref;
464 
465           engine->scratch = vma;
466           DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
467                                engine->name, i915_ggtt_offset(vma));
468           return 0;
469 
470 err_unref:
471           i915_gem_object_put(obj);
472           return ret;
473 }
474 
intel_engine_cleanup_scratch(struct intel_engine_cs * engine)475 static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
476 {
477           i915_vma_unpin_and_release(&engine->scratch);
478 }
479 
cleanup_phys_status_page(struct intel_engine_cs * engine)480 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
481 {
482           struct drm_i915_private *dev_priv = engine->i915;
483 
484           if (!dev_priv->status_page_dmah)
485                     return;
486 
487           drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
488           engine->status_page.page_addr = NULL;
489 }
490 
cleanup_status_page(struct intel_engine_cs * engine)491 static void cleanup_status_page(struct intel_engine_cs *engine)
492 {
493           struct i915_vma *vma;
494           struct drm_i915_gem_object *obj;
495 
496           vma = fetch_and_zero(&engine->status_page.vma);
497           if (!vma)
498                     return;
499 
500           obj = vma->obj;
501 
502           i915_vma_unpin(vma);
503           i915_vma_close(vma);
504 
505           i915_gem_object_unpin_map(obj);
506           __i915_gem_object_release_unless_active(obj);
507 }
508 
init_status_page(struct intel_engine_cs * engine)509 static int init_status_page(struct intel_engine_cs *engine)
510 {
511           struct drm_i915_gem_object *obj;
512           struct i915_vma *vma;
513           unsigned int flags;
514           void *vaddr;
515           int ret;
516 
517           obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
518           if (IS_ERR(obj)) {
519                     DRM_ERROR("Failed to allocate status page\n");
520                     return PTR_ERR(obj);
521           }
522 
523           ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
524           if (ret)
525                     goto err;
526 
527           vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
528           if (IS_ERR(vma)) {
529                     ret = PTR_ERR(vma);
530                     goto err;
531           }
532 
533           flags = PIN_GLOBAL;
534           if (!HAS_LLC(engine->i915))
535                     /* On g33, we cannot place HWS above 256MiB, so
536                      * restrict its pinning to the low mappable arena.
537                      * Though this restriction is not documented for
538                      * gen4, gen5, or byt, they also behave similarly
539                      * and hang if the HWS is placed at the top of the
540                      * GTT. To generalise, it appears that all !llc
541                      * platforms have issues with us placing the HWS
542                      * above the mappable region (even though we never
543                      * actually map it).
544                      */
545                     flags |= PIN_MAPPABLE;
546           else
547                     flags |= PIN_HIGH;
548           ret = i915_vma_pin(vma, 0, 4096, flags);
549           if (ret)
550                     goto err;
551 
552           vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
553           if (IS_ERR(vaddr)) {
554                     ret = PTR_ERR(vaddr);
555                     goto err_unpin;
556           }
557 
558           engine->status_page.vma = vma;
559           engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
560           engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
561 
562           DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
563                                engine->name, i915_ggtt_offset(vma));
564           return 0;
565 
566 err_unpin:
567           i915_vma_unpin(vma);
568 err:
569           i915_gem_object_put(obj);
570           return ret;
571 }
572 
init_phys_status_page(struct intel_engine_cs * engine)573 static int init_phys_status_page(struct intel_engine_cs *engine)
574 {
575           struct drm_i915_private *dev_priv = engine->i915;
576 
577           GEM_BUG_ON(engine->id != RCS);
578 
579           dev_priv->status_page_dmah =
580                     drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
581           if (!dev_priv->status_page_dmah)
582                     return -ENOMEM;
583 
584           engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
585           memset(engine->status_page.page_addr, 0, PAGE_SIZE);
586 
587           return 0;
588 }
589 
590 /**
591  * intel_engines_init_common - initialize cengine state which might require hw access
592  * @engine: Engine to initialize.
593  *
594  * Initializes @engine@ structure members shared between legacy and execlists
595  * submission modes which do require hardware access.
596  *
597  * Typcally done at later stages of submission mode specific engine setup.
598  *
599  * Returns zero on success or an error code on failure.
600  */
intel_engine_init_common(struct intel_engine_cs * engine)601 int intel_engine_init_common(struct intel_engine_cs *engine)
602 {
603           struct intel_ring *ring;
604           int ret;
605 
606           engine->set_default_submission(engine);
607 
608           /* We may need to do things with the shrinker which
609            * require us to immediately switch back to the default
610            * context. This can cause a problem as pinning the
611            * default context also requires GTT space which may not
612            * be available. To avoid this we always pin the default
613            * context.
614            */
615           ring = engine->context_pin(engine, engine->i915->kernel_context);
616           if (IS_ERR(ring))
617                     return PTR_ERR(ring);
618 
619           /*
620            * Similarly the preempt context must always be available so that
621            * we can interrupt the engine at any time.
622            */
623           if (INTEL_INFO(engine->i915)->has_logical_ring_preemption) {
624                     ring = engine->context_pin(engine,
625                                                      engine->i915->preempt_context);
626                     if (IS_ERR(ring)) {
627                               ret = PTR_ERR(ring);
628                               goto err_unpin_kernel;
629                     }
630           }
631 
632           ret = intel_engine_init_breadcrumbs(engine);
633           if (ret)
634                     goto err_unpin_preempt;
635 
636           ret = i915_gem_render_state_init(engine);
637           if (ret)
638                     goto err_breadcrumbs;
639 
640           if (HWS_NEEDS_PHYSICAL(engine->i915))
641                     ret = init_phys_status_page(engine);
642           else
643                     ret = init_status_page(engine);
644           if (ret)
645                     goto err_rs_fini;
646 
647           return 0;
648 
649 err_rs_fini:
650           i915_gem_render_state_fini(engine);
651 err_breadcrumbs:
652           intel_engine_fini_breadcrumbs(engine);
653 err_unpin_preempt:
654           if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
655                     engine->context_unpin(engine, engine->i915->preempt_context);
656 err_unpin_kernel:
657           engine->context_unpin(engine, engine->i915->kernel_context);
658           return ret;
659 }
660 
661 /**
662  * intel_engines_cleanup_common - cleans up the engine state created by
663  *                                the common initiailizers.
664  * @engine: Engine to cleanup.
665  *
666  * This cleans up everything created by the common helpers.
667  */
intel_engine_cleanup_common(struct intel_engine_cs * engine)668 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
669 {
670           intel_engine_cleanup_scratch(engine);
671 
672           if (HWS_NEEDS_PHYSICAL(engine->i915))
673                     cleanup_phys_status_page(engine);
674           else
675                     cleanup_status_page(engine);
676 
677           i915_gem_render_state_fini(engine);
678           intel_engine_fini_breadcrumbs(engine);
679           intel_engine_cleanup_cmd_parser(engine);
680           i915_gem_batch_pool_fini(&engine->batch_pool);
681 
682           if (INTEL_INFO(engine->i915)->has_logical_ring_preemption)
683                     engine->context_unpin(engine, engine->i915->preempt_context);
684           engine->context_unpin(engine, engine->i915->kernel_context);
685 }
686 
intel_engine_get_active_head(struct intel_engine_cs * engine)687 u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
688 {
689           struct drm_i915_private *dev_priv = engine->i915;
690           u64 acthd;
691 
692           if (INTEL_GEN(dev_priv) >= 8)
693                     acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
694                                                    RING_ACTHD_UDW(engine->mmio_base));
695           else if (INTEL_GEN(dev_priv) >= 4)
696                     acthd = I915_READ(RING_ACTHD(engine->mmio_base));
697           else
698                     acthd = I915_READ(ACTHD);
699 
700           return acthd;
701 }
702 
intel_engine_get_last_batch_head(struct intel_engine_cs * engine)703 u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine)
704 {
705           struct drm_i915_private *dev_priv = engine->i915;
706           u64 bbaddr;
707 
708           if (INTEL_GEN(dev_priv) >= 8)
709                     bbaddr = I915_READ64_2x32(RING_BBADDR(engine->mmio_base),
710                                                     RING_BBADDR_UDW(engine->mmio_base));
711           else
712                     bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
713 
714           return bbaddr;
715 }
716 
i915_cache_level_str(struct drm_i915_private * i915,int type)717 const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
718 {
719           switch (type) {
720           case I915_CACHE_NONE: return " uncached";
721           case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
722           case I915_CACHE_L3_LLC: return " L3+LLC";
723           case I915_CACHE_WT: return " WT";
724           default: return "";
725           }
726 }
727 
728 static inline uint32_t
read_subslice_reg(struct drm_i915_private * dev_priv,int slice,int subslice,i915_reg_t reg)729 read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
730                       int subslice, i915_reg_t reg)
731 {
732           uint32_t mcr;
733           uint32_t ret;
734           enum forcewake_domains fw_domains;
735 
736           fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
737                                                                 FW_REG_READ);
738           fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
739                                                                  GEN8_MCR_SELECTOR,
740                                                                  FW_REG_READ | FW_REG_WRITE);
741 
742           spin_lock_irq(&dev_priv->uncore.lock);
743           intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
744 
745           mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
746           /*
747            * The HW expects the slice and sublice selectors to be reset to 0
748            * after reading out the registers.
749            */
750           WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
751           mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
752           mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
753           I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
754 
755           ret = I915_READ_FW(reg);
756 
757           mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
758           I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
759 
760           intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
761           spin_unlock_irq(&dev_priv->uncore.lock);
762 
763           return ret;
764 }
765 
766 /* NB: please notice the memset */
intel_engine_get_instdone(struct intel_engine_cs * engine,struct intel_instdone * instdone)767 void intel_engine_get_instdone(struct intel_engine_cs *engine,
768                                      struct intel_instdone *instdone)
769 {
770           struct drm_i915_private *dev_priv = engine->i915;
771           u32 mmio_base = engine->mmio_base;
772           int slice;
773           int subslice;
774 
775           memset(instdone, 0, sizeof(*instdone));
776 
777           switch (INTEL_GEN(dev_priv)) {
778           default:
779                     instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
780 
781                     if (engine->id != RCS)
782                               break;
783 
784                     instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
785                     for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
786                               instdone->sampler[slice][subslice] =
787                                         read_subslice_reg(dev_priv, slice, subslice,
788                                                               GEN7_SAMPLER_INSTDONE);
789                               instdone->row[slice][subslice] =
790                                         read_subslice_reg(dev_priv, slice, subslice,
791                                                               GEN7_ROW_INSTDONE);
792                     }
793                     break;
794           case 7:
795                     instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
796 
797                     if (engine->id != RCS)
798                               break;
799 
800                     instdone->slice_common = I915_READ(GEN7_SC_INSTDONE);
801                     instdone->sampler[0][0] = I915_READ(GEN7_SAMPLER_INSTDONE);
802                     instdone->row[0][0] = I915_READ(GEN7_ROW_INSTDONE);
803 
804                     break;
805           case 6:
806           case 5:
807           case 4:
808                     instdone->instdone = I915_READ(RING_INSTDONE(mmio_base));
809 
810                     if (engine->id == RCS)
811                               /* HACK: Using the wrong struct member */
812                               instdone->slice_common = I915_READ(GEN4_INSTDONE1);
813                     break;
814           case 3:
815           case 2:
816                     instdone->instdone = I915_READ(GEN2_INSTDONE);
817                     break;
818           }
819 }
820 
wa_add(struct drm_i915_private * dev_priv,i915_reg_t addr,const u32 mask,const u32 val)821 static int wa_add(struct drm_i915_private *dev_priv,
822                       i915_reg_t addr,
823                       const u32 mask, const u32 val)
824 {
825           const u32 idx = dev_priv->workarounds.count;
826 
827           if (WARN_ON(idx >= I915_MAX_WA_REGS))
828                     return -ENOSPC;
829 
830           dev_priv->workarounds.reg[idx].addr = addr;
831           dev_priv->workarounds.reg[idx].value = val;
832           dev_priv->workarounds.reg[idx].mask = mask;
833 
834           dev_priv->workarounds.count++;
835 
836           return 0;
837 }
838 
839 #define WA_REG(addr, mask, val) do { \
840                     const int r = wa_add(dev_priv, (addr), (mask), (val)); \
841                     if (r) \
842                               return r; \
843           } while (0)
844 
845 #define WA_SET_BIT_MASKED(addr, mask) \
846           WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
847 
848 #define WA_CLR_BIT_MASKED(addr, mask) \
849           WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
850 
851 #define WA_SET_FIELD_MASKED(addr, mask, value) \
852           WA_REG(addr, mask, _MASKED_FIELD(mask, value))
853 
wa_ring_whitelist_reg(struct intel_engine_cs * engine,i915_reg_t reg)854 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
855                                          i915_reg_t reg)
856 {
857           struct drm_i915_private *dev_priv = engine->i915;
858           struct i915_workarounds *wa = &dev_priv->workarounds;
859           const uint32_t index = wa->hw_whitelist_count[engine->id];
860 
861           if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
862                     return -EINVAL;
863 
864           I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
865                        i915_mmio_reg_offset(reg));
866           wa->hw_whitelist_count[engine->id]++;
867 
868           return 0;
869 }
870 
gen8_init_workarounds(struct intel_engine_cs * engine)871 static int gen8_init_workarounds(struct intel_engine_cs *engine)
872 {
873           struct drm_i915_private *dev_priv = engine->i915;
874 
875           WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
876 
877           /* WaDisableAsyncFlipPerfMode:bdw,chv */
878           WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
879 
880           /* WaDisablePartialInstShootdown:bdw,chv */
881           WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
882                                 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
883 
884           /* Use Force Non-Coherent whenever executing a 3D context. This is a
885            * workaround for for a possible hang in the unlikely event a TLB
886            * invalidation occurs during a PSD flush.
887            */
888           /* WaForceEnableNonCoherent:bdw,chv */
889           /* WaHdcDisableFetchWhenMasked:bdw,chv */
890           WA_SET_BIT_MASKED(HDC_CHICKEN0,
891                                 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
892                                 HDC_FORCE_NON_COHERENT);
893 
894           /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
895            * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
896            *  polygons in the same 8x4 pixel/sample area to be processed without
897            *  stalling waiting for the earlier ones to write to Hierarchical Z
898            *  buffer."
899            *
900            * This optimization is off by default for BDW and CHV; turn it on.
901            */
902           WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
903 
904           /* Wa4x4STCOptimizationDisable:bdw,chv */
905           WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
906 
907           /*
908            * BSpec recommends 8x4 when MSAA is used,
909            * however in practice 16x4 seems fastest.
910            *
911            * Note that PS/WM thread counts depend on the WIZ hashing
912            * disable bit, which we don't touch here, but it's good
913            * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
914            */
915           WA_SET_FIELD_MASKED(GEN7_GT_MODE,
916                                   GEN6_WIZ_HASHING_MASK,
917                                   GEN6_WIZ_HASHING_16x4);
918 
919           return 0;
920 }
921 
bdw_init_workarounds(struct intel_engine_cs * engine)922 static int bdw_init_workarounds(struct intel_engine_cs *engine)
923 {
924           struct drm_i915_private *dev_priv = engine->i915;
925           int ret;
926 
927           ret = gen8_init_workarounds(engine);
928           if (ret)
929                     return ret;
930 
931           /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
932           WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
933 
934           /* WaDisableDopClockGating:bdw
935            *
936            * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
937            * to disable EUTC clock gating.
938            */
939           WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
940                                 DOP_CLOCK_GATING_DISABLE);
941 
942           WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
943                                 GEN8_SAMPLER_POWER_BYPASS_DIS);
944 
945           WA_SET_BIT_MASKED(HDC_CHICKEN0,
946                                 /* WaForceContextSaveRestoreNonCoherent:bdw */
947                                 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
948                                 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
949                                 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
950 
951           return 0;
952 }
953 
chv_init_workarounds(struct intel_engine_cs * engine)954 static int chv_init_workarounds(struct intel_engine_cs *engine)
955 {
956           struct drm_i915_private *dev_priv = engine->i915;
957           int ret;
958 
959           ret = gen8_init_workarounds(engine);
960           if (ret)
961                     return ret;
962 
963           /* WaDisableThreadStallDopClockGating:chv */
964           WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
965 
966           /* Improve HiZ throughput on CHV. */
967           WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
968 
969           return 0;
970 }
971 
gen9_init_workarounds(struct intel_engine_cs * engine)972 static int gen9_init_workarounds(struct intel_engine_cs *engine)
973 {
974           struct drm_i915_private *dev_priv = engine->i915;
975           int ret;
976 
977           /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
978           I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
979 
980           /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
981           I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
982                        GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
983 
984           /* WaDisableKillLogic:bxt,skl,kbl */
985           if (!IS_COFFEELAKE(dev_priv))
986                     I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
987                                  ECOCHK_DIS_TLB);
988 
989           if (HAS_LLC(dev_priv)) {
990                     /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
991                      *
992                      * Must match Display Engine. See
993                      * WaCompressedResourceDisplayNewHashMode.
994                      */
995                     WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
996                                           GEN9_PBE_COMPRESSED_HASH_SELECTION);
997                     WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
998                                           GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
999 
1000                     I915_WRITE(MMCD_MISC_CTRL,
1001                                  I915_READ(MMCD_MISC_CTRL) |
1002                                  MMCD_PCLA |
1003                                  MMCD_HOTSPOT_EN);
1004           }
1005 
1006           /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
1007           /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
1008           WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1009                                 FLOW_CONTROL_ENABLE |
1010                                 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
1011 
1012           /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
1013           if (!IS_COFFEELAKE(dev_priv))
1014                     WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1015                                           GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
1016 
1017           /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */
1018           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1019                     WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
1020                                           GEN9_DG_MIRROR_FIX_ENABLE);
1021 
1022           /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */
1023           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1024                     WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
1025                                           GEN9_RHWO_OPTIMIZATION_DISABLE);
1026                     /*
1027                      * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
1028                      * but we do that in per ctx batchbuffer as there is an issue
1029                      * with this register not getting restored on ctx restore
1030                      */
1031           }
1032 
1033           /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
1034           /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
1035           WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
1036                                 GEN9_ENABLE_YV12_BUGFIX |
1037                                 GEN9_ENABLE_GPGPU_PREEMPTION);
1038 
1039           /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
1040           /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
1041           WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
1042                                                    GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
1043 
1044           /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
1045           WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
1046                                 GEN9_CCS_TLB_PREFETCH_ENABLE);
1047 
1048           /* WaDisableMaskBasedCammingInRCC:bxt */
1049           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1050                     WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
1051                                           PIXEL_MASK_CAMMING_DISABLE);
1052 
1053           /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
1054           WA_SET_BIT_MASKED(HDC_CHICKEN0,
1055                                 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
1056                                 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
1057 
1058           /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
1059            * both tied to WaForceContextSaveRestoreNonCoherent
1060            * in some hsds for skl. We keep the tie for all gen9. The
1061            * documentation is a bit hazy and so we want to get common behaviour,
1062            * even though there is no clear evidence we would need both on kbl/bxt.
1063            * This area has been source of system hangs so we play it safe
1064            * and mimic the skl regardless of what bspec says.
1065            *
1066            * Use Force Non-Coherent whenever executing a 3D context. This
1067            * is a workaround for a possible hang in the unlikely event
1068            * a TLB invalidation occurs during a PSD flush.
1069            */
1070 
1071           /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
1072           WA_SET_BIT_MASKED(HDC_CHICKEN0,
1073                                 HDC_FORCE_NON_COHERENT);
1074 
1075           /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1076           I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1077                        BDW_DISABLE_HDC_INVALIDATION);
1078 
1079           /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
1080           if (IS_SKYLAKE(dev_priv) ||
1081               IS_KABYLAKE(dev_priv) ||
1082               IS_COFFEELAKE(dev_priv) ||
1083               IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
1084                     WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
1085                                           GEN8_SAMPLER_POWER_BYPASS_DIS);
1086 
1087           /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
1088           WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
1089 
1090           /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1091           I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
1092                                             GEN8_LQSC_FLUSH_COHERENT_LINES));
1093 
1094           /*
1095            * Supporting preemption with fine-granularity requires changes in the
1096            * batch buffer programming. Since we can't break old userspace, we
1097            * need to set our default preemption level to safe value. Userspace is
1098            * still able to use more fine-grained preemption levels, since in
1099            * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
1100            * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
1101            * not real HW workarounds, but merely a way to start using preemption
1102            * while maintaining old contract with userspace.
1103            */
1104 
1105           /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
1106           WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1107 
1108           /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
1109           WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1110                                   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1111 
1112           /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1113           ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
1114           if (ret)
1115                     return ret;
1116 
1117           /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1118           I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1119                        _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1120           ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1121           if (ret)
1122                     return ret;
1123 
1124           /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1125           ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
1126           if (ret)
1127                     return ret;
1128 
1129           return 0;
1130 }
1131 
skl_tune_iz_hashing(struct intel_engine_cs * engine)1132 static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
1133 {
1134           struct drm_i915_private *dev_priv = engine->i915;
1135           u8 vals[3] = { 0, 0, 0 };
1136           unsigned int i;
1137 
1138           for (i = 0; i < 3; i++) {
1139                     u8 ss;
1140 
1141                     /*
1142                      * Only consider slices where one, and only one, subslice has 7
1143                      * EUs
1144                      */
1145                     if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
1146                               continue;
1147 
1148                     /*
1149                      * subslice_7eu[i] != 0 (because of the check above) and
1150                      * ss_max == 4 (maximum number of subslices possible per slice)
1151                      *
1152                      * ->    0 <= ss <= 3;
1153                      */
1154                     ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
1155                     vals[i] = 3 - ss;
1156           }
1157 
1158           if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1159                     return 0;
1160 
1161           /* Tune IZ hashing. See intel_device_info_runtime_init() */
1162           WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1163                                   GEN9_IZ_HASHING_MASK(2) |
1164                                   GEN9_IZ_HASHING_MASK(1) |
1165                                   GEN9_IZ_HASHING_MASK(0),
1166                                   GEN9_IZ_HASHING(2, vals[2]) |
1167                                   GEN9_IZ_HASHING(1, vals[1]) |
1168                                   GEN9_IZ_HASHING(0, vals[0]));
1169 
1170           return 0;
1171 }
1172 
skl_init_workarounds(struct intel_engine_cs * engine)1173 static int skl_init_workarounds(struct intel_engine_cs *engine)
1174 {
1175           struct drm_i915_private *dev_priv = engine->i915;
1176           int ret;
1177 
1178           ret = gen9_init_workarounds(engine);
1179           if (ret)
1180                     return ret;
1181 
1182           /* WaEnableGapsTsvCreditFix:skl */
1183           I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1184                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
1185 
1186           /* WaDisableGafsUnitClkGating:skl */
1187           I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1188                                           GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1189 
1190           /* WaInPlaceDecompressionHang:skl */
1191           if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
1192                     I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1193                                  (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1194                                   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1195 
1196           /* WaDisableLSQCROPERFforOCL:skl */
1197           ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1198           if (ret)
1199                     return ret;
1200 
1201           return skl_tune_iz_hashing(engine);
1202 }
1203 
bxt_init_workarounds(struct intel_engine_cs * engine)1204 static int bxt_init_workarounds(struct intel_engine_cs *engine)
1205 {
1206           struct drm_i915_private *dev_priv = engine->i915;
1207           int ret;
1208 
1209           ret = gen9_init_workarounds(engine);
1210           if (ret)
1211                     return ret;
1212 
1213           /* WaStoreMultiplePTEenable:bxt */
1214           /* This is a requirement according to Hardware specification */
1215           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
1216                     I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1217 
1218           /* WaSetClckGatingDisableMedia:bxt */
1219           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1220                     I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1221                                                       ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1222           }
1223 
1224           /* WaDisableThreadStallDopClockGating:bxt */
1225           WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1226                                 STALL_DOP_GATING_DISABLE);
1227 
1228           /* WaDisablePooledEuLoadBalancingFix:bxt */
1229           if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1230                     I915_WRITE(FF_SLICE_CS_CHICKEN2,
1231                                  _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
1232           }
1233 
1234           /* WaDisableSbeCacheDispatchPortSharing:bxt */
1235           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) {
1236                     WA_SET_BIT_MASKED(
1237                               GEN7_HALF_SLICE_CHICKEN1,
1238                               GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1239           }
1240 
1241           /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
1242           /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
1243           /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
1244           /* WaDisableLSQCROPERFforOCL:bxt */
1245           if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
1246                     ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1);
1247                     if (ret)
1248                               return ret;
1249 
1250                     ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1251                     if (ret)
1252                               return ret;
1253           }
1254 
1255           /* WaProgramL3SqcReg1DefaultForPerf:bxt */
1256           if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
1257                     u32 val = I915_READ(GEN8_L3SQCREG1);
1258                     val &= ~L3_PRIO_CREDITS_MASK;
1259                     val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
1260                     I915_WRITE(GEN8_L3SQCREG1, val);
1261           }
1262 
1263           /* WaToEnableHwFixForPushConstHWBug:bxt */
1264           if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1265                     WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1266                                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1267 
1268           /* WaInPlaceDecompressionHang:bxt */
1269           if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
1270                     I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1271                                  (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1272                                   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1273 
1274           return 0;
1275 }
1276 
cnl_init_workarounds(struct intel_engine_cs * engine)1277 static int cnl_init_workarounds(struct intel_engine_cs *engine)
1278 {
1279           struct drm_i915_private *dev_priv = engine->i915;
1280           int ret;
1281 
1282           /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
1283           if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1284                     I915_WRITE(GAMT_CHKN_BIT_REG,
1285                                  (I915_READ(GAMT_CHKN_BIT_REG) |
1286                                   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
1287 
1288           /* WaForceContextSaveRestoreNonCoherent:cnl */
1289           WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
1290                                 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
1291 
1292           /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
1293           if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
1294                     WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
1295 
1296           /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
1297           WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1298                                 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1299 
1300           /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
1301           if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
1302                     WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1303                                           GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
1304 
1305           /* WaInPlaceDecompressionHang:cnl */
1306           I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1307                        (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1308                         GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1309 
1310           /* WaPushConstantDereferenceHoldDisable:cnl */
1311           WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
1312 
1313           /* FtrEnableFastAnisoL1BankingFix: cnl */
1314           WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
1315 
1316           /* WaDisable3DMidCmdPreemption:cnl */
1317           WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
1318 
1319           /* WaDisableGPGPUMidCmdPreemption:cnl */
1320           WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
1321                                   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
1322 
1323           /* WaEnablePreemptionGranularityControlByUMD:cnl */
1324           I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
1325                        _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
1326           ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
1327           if (ret)
1328                     return ret;
1329 
1330           return 0;
1331 }
1332 
kbl_init_workarounds(struct intel_engine_cs * engine)1333 static int kbl_init_workarounds(struct intel_engine_cs *engine)
1334 {
1335           struct drm_i915_private *dev_priv = engine->i915;
1336           int ret;
1337 
1338           ret = gen9_init_workarounds(engine);
1339           if (ret)
1340                     return ret;
1341 
1342           /* WaEnableGapsTsvCreditFix:kbl */
1343           I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1344                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
1345 
1346           /* WaDisableDynamicCreditSharing:kbl */
1347           if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
1348                     I915_WRITE(GAMT_CHKN_BIT_REG,
1349                                  (I915_READ(GAMT_CHKN_BIT_REG) |
1350                                   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
1351 
1352           /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
1353           if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
1354                     WA_SET_BIT_MASKED(HDC_CHICKEN0,
1355                                           HDC_FENCE_DEST_SLM_DISABLE);
1356 
1357           /* WaToEnableHwFixForPushConstHWBug:kbl */
1358           if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
1359                     WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1360                                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1361 
1362           /* WaDisableGafsUnitClkGating:kbl */
1363           I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1364                                           GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1365 
1366           /* WaDisableSbeCacheDispatchPortSharing:kbl */
1367           WA_SET_BIT_MASKED(
1368                     GEN7_HALF_SLICE_CHICKEN1,
1369                     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1370 
1371           /* WaInPlaceDecompressionHang:kbl */
1372           I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1373                        (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1374                         GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1375 
1376           /* WaDisableLSQCROPERFforOCL:kbl */
1377           ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
1378           if (ret)
1379                     return ret;
1380 
1381           return 0;
1382 }
1383 
glk_init_workarounds(struct intel_engine_cs * engine)1384 static int glk_init_workarounds(struct intel_engine_cs *engine)
1385 {
1386           struct drm_i915_private *dev_priv = engine->i915;
1387           int ret;
1388 
1389           ret = gen9_init_workarounds(engine);
1390           if (ret)
1391                     return ret;
1392 
1393           /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1394           ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1395           if (ret)
1396                     return ret;
1397 
1398           /* WaToEnableHwFixForPushConstHWBug:glk */
1399           WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1400                                 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1401 
1402           return 0;
1403 }
1404 
cfl_init_workarounds(struct intel_engine_cs * engine)1405 static int cfl_init_workarounds(struct intel_engine_cs *engine)
1406 {
1407           struct drm_i915_private *dev_priv = engine->i915;
1408           int ret;
1409 
1410           ret = gen9_init_workarounds(engine);
1411           if (ret)
1412                     return ret;
1413 
1414           /* WaEnableGapsTsvCreditFix:cfl */
1415           I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1416                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
1417 
1418           /* WaToEnableHwFixForPushConstHWBug:cfl */
1419           WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
1420                                 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
1421 
1422           /* WaDisableGafsUnitClkGating:cfl */
1423           I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
1424                                           GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
1425 
1426           /* WaDisableSbeCacheDispatchPortSharing:cfl */
1427           WA_SET_BIT_MASKED(
1428                     GEN7_HALF_SLICE_CHICKEN1,
1429                     GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1430 
1431           /* WaInPlaceDecompressionHang:cfl */
1432           I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
1433                        (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
1434                         GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
1435 
1436           return 0;
1437 }
1438 
init_workarounds_ring(struct intel_engine_cs * engine)1439 int init_workarounds_ring(struct intel_engine_cs *engine)
1440 {
1441           struct drm_i915_private *dev_priv = engine->i915;
1442           int err;
1443 
1444           WARN_ON(engine->id != RCS);
1445 
1446           dev_priv->workarounds.count = 0;
1447           dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
1448 
1449           if (IS_BROADWELL(dev_priv))
1450                     err = bdw_init_workarounds(engine);
1451           else if (IS_CHERRYVIEW(dev_priv))
1452                     err = chv_init_workarounds(engine);
1453           else if (IS_SKYLAKE(dev_priv))
1454                     err =  skl_init_workarounds(engine);
1455           else if (IS_BROXTON(dev_priv))
1456                     err = bxt_init_workarounds(engine);
1457           else if (IS_KABYLAKE(dev_priv))
1458                     err = kbl_init_workarounds(engine);
1459           else if (IS_GEMINILAKE(dev_priv))
1460                     err =  glk_init_workarounds(engine);
1461           else if (IS_COFFEELAKE(dev_priv))
1462                     err = cfl_init_workarounds(engine);
1463           else if (IS_CANNONLAKE(dev_priv))
1464                     err = cnl_init_workarounds(engine);
1465           else
1466                     err = 0;
1467           if (err)
1468                     return err;
1469 
1470           DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
1471                                engine->name, dev_priv->workarounds.count);
1472           return 0;
1473 }
1474 
intel_ring_workarounds_emit(struct drm_i915_gem_request * req)1475 int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
1476 {
1477           struct i915_workarounds *w = &req->i915->workarounds;
1478           u32 *cs;
1479           int ret, i;
1480 
1481           if (w->count == 0)
1482                     return 0;
1483 
1484           ret = req->engine->emit_flush(req, EMIT_BARRIER);
1485           if (ret)
1486                     return ret;
1487 
1488           cs = intel_ring_begin(req, (w->count * 2 + 2));
1489           if (IS_ERR(cs))
1490                     return PTR_ERR(cs);
1491 
1492           *cs++ = MI_LOAD_REGISTER_IMM(w->count);
1493           for (i = 0; i < w->count; i++) {
1494                     *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
1495                     *cs++ = w->reg[i].value;
1496           }
1497           *cs++ = MI_NOOP;
1498 
1499           intel_ring_advance(req, cs);
1500 
1501           ret = req->engine->emit_flush(req, EMIT_BARRIER);
1502           if (ret)
1503                     return ret;
1504 
1505           return 0;
1506 }
1507 
ring_is_idle(struct intel_engine_cs * engine)1508 static bool ring_is_idle(struct intel_engine_cs *engine)
1509 {
1510           struct drm_i915_private *dev_priv = engine->i915;
1511           bool idle = true;
1512 
1513           intel_runtime_pm_get(dev_priv);
1514 
1515           /* First check that no commands are left in the ring */
1516           if ((I915_READ_HEAD(engine) & HEAD_ADDR) !=
1517               (I915_READ_TAIL(engine) & TAIL_ADDR))
1518                     idle = false;
1519 
1520           /* No bit for gen2, so assume the CS parser is idle */
1521           if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE))
1522                     idle = false;
1523 
1524           intel_runtime_pm_put(dev_priv);
1525 
1526           return idle;
1527 }
1528 
1529 /**
1530  * intel_engine_is_idle() - Report if the engine has finished process all work
1531  * @engine: the intel_engine_cs
1532  *
1533  * Return true if there are no requests pending, nothing left to be submitted
1534  * to hardware, and that the engine is idle.
1535  */
intel_engine_is_idle(struct intel_engine_cs * engine)1536 bool intel_engine_is_idle(struct intel_engine_cs *engine)
1537 {
1538           struct drm_i915_private *dev_priv = engine->i915;
1539 
1540           /* More white lies, if wedged, hw state is inconsistent */
1541           if (i915_terminally_wedged(&dev_priv->gpu_error))
1542                     return true;
1543 
1544           /* Any inflight/incomplete requests? */
1545           if (!i915_seqno_passed(intel_engine_get_seqno(engine),
1546                                      intel_engine_last_submit(engine)))
1547                     return false;
1548 
1549           if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock))
1550                     return true;
1551 
1552           /* Interrupt/tasklet pending? */
1553           if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
1554                     return false;
1555 
1556           /* Waiting to drain ELSP? */
1557           if (READ_ONCE(engine->execlists.active))
1558                     return false;
1559 
1560           /* ELSP is empty, but there are ready requests? */
1561           if (READ_ONCE(engine->execlists.first))
1562                     return false;
1563 
1564           /* Ring stopped? */
1565           if (!ring_is_idle(engine))
1566                     return false;
1567 
1568           return true;
1569 }
1570 
intel_engines_are_idle(struct drm_i915_private * dev_priv)1571 bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
1572 {
1573           struct intel_engine_cs *engine;
1574           enum intel_engine_id id;
1575 
1576           if (READ_ONCE(dev_priv->gt.active_requests))
1577                     return false;
1578 
1579           /* If the driver is wedged, HW state may be very inconsistent and
1580            * report that it is still busy, even though we have stopped using it.
1581            */
1582           if (i915_terminally_wedged(&dev_priv->gpu_error))
1583                     return true;
1584 
1585           for_each_engine(engine, dev_priv, id) {
1586                     if (!intel_engine_is_idle(engine))
1587                               return false;
1588           }
1589 
1590           return true;
1591 }
1592 
intel_engines_reset_default_submission(struct drm_i915_private * i915)1593 void intel_engines_reset_default_submission(struct drm_i915_private *i915)
1594 {
1595           struct intel_engine_cs *engine;
1596           enum intel_engine_id id;
1597 
1598           for_each_engine(engine, i915, id)
1599                     engine->set_default_submission(engine);
1600 }
1601 
intel_engines_mark_idle(struct drm_i915_private * i915)1602 void intel_engines_mark_idle(struct drm_i915_private *i915)
1603 {
1604           struct intel_engine_cs *engine;
1605           enum intel_engine_id id;
1606 
1607           for_each_engine(engine, i915, id) {
1608                     intel_engine_disarm_breadcrumbs(engine);
1609                     i915_gem_batch_pool_fini(&engine->batch_pool);
1610                     tasklet_kill(&engine->execlists.irq_tasklet);
1611                     engine->execlists.no_priolist = false;
1612           }
1613 }
1614 
intel_engine_can_store_dword(struct intel_engine_cs * engine)1615 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1616 {
1617           switch (INTEL_GEN(engine->i915)) {
1618           case 2:
1619                     return false; /* uses physical not virtual addresses */
1620           case 3:
1621                     /* maybe only uses physical not virtual addresses */
1622                     return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
1623           case 6:
1624                     return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
1625           default:
1626                     return true;
1627           }
1628 }
1629 
print_request(struct drm_printer * m,struct drm_i915_gem_request * rq,const char * prefix)1630 static void print_request(struct drm_printer *m,
1631                                 struct drm_i915_gem_request *rq,
1632                                 const char *prefix)
1633 {
1634           drm_printf(m, "%s%x%s [%x:%x] prio=%d @ %ldms: %s\n", prefix,
1635                        rq->global_seqno,
1636                        i915_gem_request_completed(rq) ? "!" : "",
1637                        rq->ctx->hw_id, rq->fence.seqno,
1638                        rq->priotree.priority,
1639                        jiffies_to_msecs(jiffies - rq->emitted_jiffies),
1640                        rq->timeline->common->name);
1641 }
1642 
intel_engine_dump(struct intel_engine_cs * engine,struct drm_printer * m)1643 void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
1644 {
1645           struct intel_breadcrumbs * const b = &engine->breadcrumbs;
1646           const struct intel_engine_execlists * const execlists = &engine->execlists;
1647           struct i915_gpu_error * const error = &engine->i915->gpu_error;
1648           struct drm_i915_private *dev_priv = engine->i915;
1649           struct drm_i915_gem_request *rq;
1650           struct rb_node *rb;
1651           u64 addr;
1652 
1653           drm_printf(m, "%s\n", engine->name);
1654           drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%ld ms], inflight %d\n",
1655                        intel_engine_get_seqno(engine),
1656                        intel_engine_last_submit(engine),
1657                        engine->hangcheck.seqno,
1658                        jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
1659                        engine->timeline->inflight_seqnos);
1660           drm_printf(m, "\tReset count: %d\n",
1661                        i915_reset_engine_count(error, engine));
1662 
1663           rcu_read_lock();
1664 
1665           drm_printf(m, "\tRequests:\n");
1666 
1667           rq = list_first_entry(&engine->timeline->requests,
1668                                     struct drm_i915_gem_request, link);
1669           if (&rq->link != &engine->timeline->requests)
1670                     print_request(m, rq, "\t\tfirst  ");
1671 
1672           rq = list_last_entry(&engine->timeline->requests,
1673                                    struct drm_i915_gem_request, link);
1674           if (&rq->link != &engine->timeline->requests)
1675                     print_request(m, rq, "\t\tlast   ");
1676 
1677           rq = i915_gem_find_active_request(engine);
1678           if (rq) {
1679                     print_request(m, rq, "\t\tactive ");
1680                     drm_printf(m,
1681                                  "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n",
1682                                  rq->head, rq->postfix, rq->tail,
1683                                  rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
1684                                  rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
1685           }
1686 
1687           drm_printf(m, "\tRING_START: 0x%08x [0x%08x]\n",
1688                        I915_READ(RING_START(engine->mmio_base)),
1689                        rq ? i915_ggtt_offset(rq->ring->vma) : 0);
1690           drm_printf(m, "\tRING_HEAD:  0x%08x [0x%08x]\n",
1691                        I915_READ(RING_HEAD(engine->mmio_base)) & HEAD_ADDR,
1692                        rq ? rq->ring->head : 0);
1693           drm_printf(m, "\tRING_TAIL:  0x%08x [0x%08x]\n",
1694                        I915_READ(RING_TAIL(engine->mmio_base)) & TAIL_ADDR,
1695                        rq ? rq->ring->tail : 0);
1696           drm_printf(m, "\tRING_CTL:   0x%08x [%s]\n",
1697                        I915_READ(RING_CTL(engine->mmio_base)),
1698                        I915_READ(RING_CTL(engine->mmio_base)) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? "waiting" : "");
1699 
1700           rcu_read_unlock();
1701 
1702           addr = intel_engine_get_active_head(engine);
1703           drm_printf(m, "\tACTHD:  0x%08x_%08x\n",
1704                        upper_32_bits(addr), lower_32_bits(addr));
1705           addr = intel_engine_get_last_batch_head(engine);
1706           drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
1707                        upper_32_bits(addr), lower_32_bits(addr));
1708 
1709           if (i915_modparams.enable_execlists) {
1710                     const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
1711                     u32 ptr, read, write;
1712                     unsigned int idx;
1713 
1714                     drm_printf(m, "\tExeclist status: 0x%08x %08x\n",
1715                                  I915_READ(RING_EXECLIST_STATUS_LO(engine)),
1716                                  I915_READ(RING_EXECLIST_STATUS_HI(engine)));
1717 
1718                     ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
1719                     read = GEN8_CSB_READ_PTR(ptr);
1720                     write = GEN8_CSB_WRITE_PTR(ptr);
1721                     drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
1722                                  read, execlists->csb_head,
1723                                  write,
1724                                  intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
1725                                  yesno(test_bit(ENGINE_IRQ_EXECLIST,
1726                                                     &engine->irq_posted)));
1727                     if (read >= GEN8_CSB_ENTRIES)
1728                               read = 0;
1729                     if (write >= GEN8_CSB_ENTRIES)
1730                               write = 0;
1731                     if (read > write)
1732                               write += GEN8_CSB_ENTRIES;
1733                     while (read < write) {
1734                               idx = ++read % GEN8_CSB_ENTRIES;
1735                               drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n",
1736                                            idx,
1737                                            I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
1738                                            hws[idx * 2],
1739                                            I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)),
1740                                            hws[idx * 2 + 1]);
1741                     }
1742 
1743                     rcu_read_lock();
1744                     for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
1745                               unsigned int count;
1746 
1747                               rq = port_unpack(&execlists->port[idx], &count);
1748                               if (rq) {
1749                                         drm_printf(m, "\t\tELSP[%d] count=%d, ",
1750                                                      idx, count);
1751                                         print_request(m, rq, "rq: ");
1752                               } else {
1753                                         drm_printf(m, "\t\tELSP[%d] idle\n",
1754                                                      idx);
1755                               }
1756                     }
1757                     drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
1758                     rcu_read_unlock();
1759           } else if (INTEL_GEN(dev_priv) > 6) {
1760                     drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
1761                                  I915_READ(RING_PP_DIR_BASE(engine)));
1762                     drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n",
1763                                  I915_READ(RING_PP_DIR_BASE_READ(engine)));
1764                     drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n",
1765                                  I915_READ(RING_PP_DIR_DCLV(engine)));
1766           }
1767 
1768           spin_lock_irq(&engine->timeline->lock);
1769           list_for_each_entry(rq, &engine->timeline->requests, link)
1770                     print_request(m, rq, "\t\tE ");
1771           for (rb = execlists->first; rb; rb = rb_next(rb)) {
1772                     struct i915_priolist *p =
1773                               rb_entry(rb, typeof(*p), node);
1774 
1775                     list_for_each_entry(rq, &p->requests, priotree.link)
1776                               print_request(m, rq, "\t\tQ ");
1777           }
1778           spin_unlock_irq(&engine->timeline->lock);
1779 
1780           spin_lock_irq(&b->rb_lock);
1781           for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
1782                     struct intel_wait *w = rb_entry(rb, typeof(*w), node);
1783 
1784                     drm_printf(m, "\t%s [%d] waiting for %x\n",
1785                                  w->tsk->comm, w->tsk->pid, w->seqno);
1786           }
1787           spin_unlock_irq(&b->rb_lock);
1788 
1789           drm_printf(m, "\n");
1790 }
1791 
1792 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1793 #include "selftests/mock_engine.c"
1794 #endif
1795