xref: /dragonfly/sys/dev/drm/i915/i915_gem_gtt.c (revision 3f2dd94a569761201b5b0a18b2f697f97fe1b9dc)
1 /*
2  * Copyright © 2010 Daniel Vetter
3  * Copyright © 2011-2014 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/slab.h> /* fault-inject.h is not standalone! */
27 
28 #include <linux/fault-inject.h>
29 #include <linux/log2.h>
30 #include <linux/random.h>
31 #include <linux/seq_file.h>
32 #include <linux/stop_machine.h>
33 
34 #include <asm/set_memory.h>
35 
36 #include <drm/drmP.h>
37 #include <drm/i915_drm.h>
38 
39 #include "i915_drv.h"
40 #include "i915_vgpu.h"
41 #include "i915_trace.h"
42 #include "intel_drv.h"
43 #include "intel_frontbuffer.h"
44 
45 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
46 
47 /**
48  * DOC: Global GTT views
49  *
50  * Background and previous state
51  *
52  * Historically objects could exists (be bound) in global GTT space only as
53  * singular instances with a view representing all of the object's backing pages
54  * in a linear fashion. This view will be called a normal view.
55  *
56  * To support multiple views of the same object, where the number of mapped
57  * pages is not equal to the backing store, or where the layout of the pages
58  * is not linear, concept of a GGTT view was added.
59  *
60  * One example of an alternative view is a stereo display driven by a single
61  * image. In this case we would have a framebuffer looking like this
62  * (2x2 pages):
63  *
64  *    12
65  *    34
66  *
67  * Above would represent a normal GGTT view as normally mapped for GPU or CPU
68  * rendering. In contrast, fed to the display engine would be an alternative
69  * view which could look something like this:
70  *
71  *   1212
72  *   3434
73  *
74  * In this example both the size and layout of pages in the alternative view is
75  * different from the normal view.
76  *
77  * Implementation and usage
78  *
79  * GGTT views are implemented using VMAs and are distinguished via enum
80  * i915_ggtt_view_type and struct i915_ggtt_view.
81  *
82  * A new flavour of core GEM functions which work with GGTT bound objects were
83  * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
84  * renaming  in large amounts of code. They take the struct i915_ggtt_view
85  * parameter encapsulating all metadata required to implement a view.
86  *
87  * As a helper for callers which are only interested in the normal view,
88  * globally const i915_ggtt_view_normal singleton instance exists. All old core
89  * GEM API functions, the ones not taking the view parameter, are operating on,
90  * or with the normal GGTT view.
91  *
92  * Code wanting to add or use a new GGTT view needs to:
93  *
94  * 1. Add a new enum with a suitable name.
95  * 2. Extend the metadata in the i915_ggtt_view structure if required.
96  * 3. Add support to i915_get_vma_pages().
97  *
98  * New views are required to build a scatter-gather table from within the
99  * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
100  * exists for the lifetime of an VMA.
101  *
102  * Core API is designed to have copy semantics which means that passed in
103  * struct i915_ggtt_view does not need to be persistent (left around after
104  * calling the core API functions).
105  *
106  */
107 
108 static int
109 i915_get_ggtt_vma_pages(struct i915_vma *vma);
110 
gen6_ggtt_invalidate(struct drm_i915_private * dev_priv)111 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
112 {
113           /* Note that as an uncached mmio write, this should flush the
114            * WCB of the writes into the GGTT before it triggers the invalidate.
115            */
116           I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
117 }
118 
guc_ggtt_invalidate(struct drm_i915_private * dev_priv)119 static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
120 {
121           gen6_ggtt_invalidate(dev_priv);
122           I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
123 }
124 
gmch_ggtt_invalidate(struct drm_i915_private * dev_priv)125 static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
126 {
127           intel_gtt_chipset_flush();
128 }
129 
i915_ggtt_invalidate(struct drm_i915_private * i915)130 static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
131 {
132           i915->ggtt.invalidate(i915);
133 }
134 
intel_sanitize_enable_ppgtt(struct drm_i915_private * dev_priv,int enable_ppgtt)135 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
136                                         int enable_ppgtt)
137 {
138           bool has_full_ppgtt;
139           bool has_full_48bit_ppgtt;
140 
141           if (!dev_priv->info.has_aliasing_ppgtt)
142                     return 0;
143 
144           has_full_ppgtt = dev_priv->info.has_full_ppgtt;
145           has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
146 
147           if (intel_vgpu_active(dev_priv)) {
148                     /* GVT-g has no support for 32bit ppgtt */
149                     has_full_ppgtt = false;
150                     has_full_48bit_ppgtt = intel_vgpu_has_full_48bit_ppgtt(dev_priv);
151           }
152 
153           /*
154            * We don't allow disabling PPGTT for gen9+ as it's a requirement for
155            * execlists, the sole mechanism available to submit work.
156            */
157           if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
158                     return 0;
159 
160           if (enable_ppgtt == 1)
161                     return 1;
162 
163           if (enable_ppgtt == 2 && has_full_ppgtt)
164                     return 2;
165 
166           if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
167                     return 3;
168 
169           /* Disable ppgtt on SNB if VT-d is on. */
170           if (IS_GEN6(dev_priv) && intel_vtd_active()) {
171                     DRM_INFO("Disabling PPGTT because VT-d is on\n");
172                     return 0;
173           }
174 
175           /* Early VLV doesn't have this */
176           if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
177                     DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
178                     return 0;
179           }
180 
181           if (INTEL_GEN(dev_priv) >= 8 && i915_modparams.enable_execlists) {
182                     if (has_full_48bit_ppgtt)
183                               return 3;
184 
185                     if (has_full_ppgtt)
186                               return 2;
187           }
188 
189           return 1;
190 }
191 
ppgtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)192 static int ppgtt_bind_vma(struct i915_vma *vma,
193                                 enum i915_cache_level cache_level,
194                                 u32 unused)
195 {
196           u32 pte_flags;
197           int ret;
198 
199           if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
200                     ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
201                                                              vma->size);
202                     if (ret)
203                               return ret;
204           }
205 
206           /* Currently applicable only to VLV */
207           pte_flags = 0;
208           if (vma->obj->gt_ro)
209                     pte_flags |= PTE_READ_ONLY;
210 
211           vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
212 
213           return 0;
214 }
215 
ppgtt_unbind_vma(struct i915_vma * vma)216 static void ppgtt_unbind_vma(struct i915_vma *vma)
217 {
218           vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
219 }
220 
ppgtt_set_pages(struct i915_vma * vma)221 static int ppgtt_set_pages(struct i915_vma *vma)
222 {
223           GEM_BUG_ON(vma->pages);
224 
225           vma->pages = vma->obj->mm.pages;
226 
227           vma->page_sizes = vma->obj->mm.page_sizes;
228 
229           return 0;
230 }
231 
clear_pages(struct i915_vma * vma)232 static void clear_pages(struct i915_vma *vma)
233 {
234           GEM_BUG_ON(!vma->pages);
235 
236           if (vma->pages != vma->obj->mm.pages) {
237                     sg_free_table(vma->pages);
238                     kfree(vma->pages);
239           }
240           vma->pages = NULL;
241 
242           memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
243 }
244 
gen8_pte_encode(dma_addr_t addr,enum i915_cache_level level)245 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
246                                           enum i915_cache_level level)
247 {
248           gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
249           pte |= addr;
250 
251           switch (level) {
252           case I915_CACHE_NONE:
253                     pte |= PPAT_UNCACHED;
254                     break;
255           case I915_CACHE_WT:
256                     pte |= PPAT_DISPLAY_ELLC;
257                     break;
258           default:
259                     pte |= PPAT_CACHED;
260                     break;
261           }
262 
263           return pte;
264 }
265 
gen8_pde_encode(const dma_addr_t addr,const enum i915_cache_level level)266 static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
267                                           const enum i915_cache_level level)
268 {
269           gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
270           pde |= addr;
271           if (level != I915_CACHE_NONE)
272                     pde |= PPAT_CACHED_PDE;
273           else
274                     pde |= PPAT_UNCACHED;
275           return pde;
276 }
277 
278 #define gen8_pdpe_encode gen8_pde_encode
279 #define gen8_pml4e_encode gen8_pde_encode
280 
snb_pte_encode(dma_addr_t addr,enum i915_cache_level level,u32 unused)281 static gen6_pte_t snb_pte_encode(dma_addr_t addr,
282                                          enum i915_cache_level level,
283                                          u32 unused)
284 {
285           gen6_pte_t pte = GEN6_PTE_VALID;
286           pte |= GEN6_PTE_ADDR_ENCODE(addr);
287 
288           switch (level) {
289           case I915_CACHE_L3_LLC:
290           case I915_CACHE_LLC:
291                     pte |= GEN6_PTE_CACHE_LLC;
292                     break;
293           case I915_CACHE_NONE:
294                     pte |= GEN6_PTE_UNCACHED;
295                     break;
296           default:
297                     MISSING_CASE(level);
298           }
299 
300           return pte;
301 }
302 
ivb_pte_encode(dma_addr_t addr,enum i915_cache_level level,u32 unused)303 static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
304                                          enum i915_cache_level level,
305                                          u32 unused)
306 {
307           gen6_pte_t pte = GEN6_PTE_VALID;
308           pte |= GEN6_PTE_ADDR_ENCODE(addr);
309 
310           switch (level) {
311           case I915_CACHE_L3_LLC:
312                     pte |= GEN7_PTE_CACHE_L3_LLC;
313                     break;
314           case I915_CACHE_LLC:
315                     pte |= GEN6_PTE_CACHE_LLC;
316                     break;
317           case I915_CACHE_NONE:
318                     pte |= GEN6_PTE_UNCACHED;
319                     break;
320           default:
321                     MISSING_CASE(level);
322           }
323 
324           return pte;
325 }
326 
byt_pte_encode(dma_addr_t addr,enum i915_cache_level level,u32 flags)327 static gen6_pte_t byt_pte_encode(dma_addr_t addr,
328                                          enum i915_cache_level level,
329                                          u32 flags)
330 {
331           gen6_pte_t pte = GEN6_PTE_VALID;
332           pte |= GEN6_PTE_ADDR_ENCODE(addr);
333 
334           if (!(flags & PTE_READ_ONLY))
335                     pte |= BYT_PTE_WRITEABLE;
336 
337           if (level != I915_CACHE_NONE)
338                     pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
339 
340           return pte;
341 }
342 
hsw_pte_encode(dma_addr_t addr,enum i915_cache_level level,u32 unused)343 static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
344                                          enum i915_cache_level level,
345                                          u32 unused)
346 {
347           gen6_pte_t pte = GEN6_PTE_VALID;
348           pte |= HSW_PTE_ADDR_ENCODE(addr);
349 
350           if (level != I915_CACHE_NONE)
351                     pte |= HSW_WB_LLC_AGE3;
352 
353           return pte;
354 }
355 
iris_pte_encode(dma_addr_t addr,enum i915_cache_level level,u32 unused)356 static gen6_pte_t iris_pte_encode(dma_addr_t addr,
357                                           enum i915_cache_level level,
358                                           u32 unused)
359 {
360           gen6_pte_t pte = GEN6_PTE_VALID;
361           pte |= HSW_PTE_ADDR_ENCODE(addr);
362 
363           switch (level) {
364           case I915_CACHE_NONE:
365                     break;
366           case I915_CACHE_WT:
367                     pte |= HSW_WT_ELLC_LLC_AGE3;
368                     break;
369           default:
370                     pte |= HSW_WB_ELLC_LLC_AGE3;
371                     break;
372           }
373 
374           return pte;
375 }
376 
vm_alloc_page(struct i915_address_space * vm,gfp_t gfp)377 static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
378 {
379           struct pagevec *pvec = &vm->free_pages;
380 
381           if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
382                     i915_gem_shrink_all(vm->i915);
383 
384           if (likely(pvec->nr))
385                     return pvec->pages[--pvec->nr];
386 
387           if (!vm->pt_kmap_wc)
388                     return alloc_page(gfp);
389 
390           /* A placeholder for a specific mutex to guard the WC stash */
391           lockdep_assert_held(&vm->i915->drm.struct_mutex);
392 
393           /* Look in our global stash of WC pages... */
394           pvec = &vm->i915->mm.wc_stash;
395           if (likely(pvec->nr))
396                     return pvec->pages[--pvec->nr];
397 
398           /* Otherwise batch allocate pages to amoritize cost of set_pages_wc. */
399           do {
400                     struct page *page;
401 
402                     page = alloc_page(gfp);
403                     if (unlikely(!page))
404                               break;
405 
406                     pvec->pages[pvec->nr++] = page;
407           } while (pagevec_space(pvec));
408 
409           if (unlikely(!pvec->nr))
410                     return NULL;
411 
412           set_pages_array_wc(pvec->pages, pvec->nr);
413 
414           return pvec->pages[--pvec->nr];
415 }
416 
vm_free_pages_release(struct i915_address_space * vm,bool immediate)417 static void vm_free_pages_release(struct i915_address_space *vm,
418                                           bool immediate)
419 {
420           struct pagevec *pvec = &vm->free_pages;
421 
422           GEM_BUG_ON(!pagevec_count(pvec));
423 
424           if (vm->pt_kmap_wc) {
425                     struct pagevec *stash = &vm->i915->mm.wc_stash;
426 
427                     /* When we use WC, first fill up the global stash and then
428                      * only if full immediately free the overflow.
429                      */
430 
431                     lockdep_assert_held(&vm->i915->drm.struct_mutex);
432                     if (pagevec_space(stash)) {
433                               do {
434                                         stash->pages[stash->nr++] =
435                                                   pvec->pages[--pvec->nr];
436                                         if (!pvec->nr)
437                                                   return;
438                               } while (pagevec_space(stash));
439 
440                               /* As we have made some room in the VM's free_pages,
441                                * we can wait for it to fill again. Unless we are
442                                * inside i915_address_space_fini() and must
443                                * immediately release the pages!
444                                */
445                               if (!immediate)
446                                         return;
447                     }
448 
449                     set_pages_array_wb(pvec->pages, pvec->nr);
450           }
451 
452           __pagevec_release(pvec);
453 }
454 
vm_free_page(struct i915_address_space * vm,struct page * page)455 static void vm_free_page(struct i915_address_space *vm, struct page *page)
456 {
457           if (!pagevec_add(&vm->free_pages, page))
458                     vm_free_pages_release(vm, false);
459 }
460 
__setup_page_dma(struct i915_address_space * vm,struct i915_page_dma * p,gfp_t gfp)461 static int __setup_page_dma(struct i915_address_space *vm,
462                                   struct i915_page_dma *p,
463                                   gfp_t gfp)
464 {
465           p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
466           if (unlikely(!p->page))
467                     return -ENOMEM;
468 
469           p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
470                                         PCI_DMA_BIDIRECTIONAL);
471           if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
472                     vm_free_page(vm, p->page);
473                     return -ENOMEM;
474           }
475 
476           return 0;
477 }
478 
setup_page_dma(struct i915_address_space * vm,struct i915_page_dma * p)479 static int setup_page_dma(struct i915_address_space *vm,
480                                 struct i915_page_dma *p)
481 {
482           return __setup_page_dma(vm, p, I915_GFP_DMA);
483 }
484 
cleanup_page_dma(struct i915_address_space * vm,struct i915_page_dma * p)485 static void cleanup_page_dma(struct i915_address_space *vm,
486                                    struct i915_page_dma *p)
487 {
488           dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
489           vm_free_page(vm, p->page);
490 }
491 
492 #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
493 
494 #define setup_px(vm, px) setup_page_dma((vm), px_base(px))
495 #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
496 #define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
497 #define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
498 
fill_page_dma(struct i915_address_space * vm,struct i915_page_dma * p,const u64 val)499 static void fill_page_dma(struct i915_address_space *vm,
500                                 struct i915_page_dma *p,
501                                 const u64 val)
502 {
503           u64 * const vaddr = kmap_atomic(p->page);
504 
505           memset64(vaddr, val, PAGE_SIZE / sizeof(val));
506 
507           kunmap_atomic(vaddr);
508 }
509 
fill_page_dma_32(struct i915_address_space * vm,struct i915_page_dma * p,const u32 v)510 static void fill_page_dma_32(struct i915_address_space *vm,
511                                    struct i915_page_dma *p,
512                                    const u32 v)
513 {
514           fill_page_dma(vm, p, (u64)v << 32 | v);
515 }
516 
517 static int
setup_scratch_page(struct i915_address_space * vm,gfp_t gfp)518 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
519 {
520           struct page *page = NULL;
521           dma_addr_t addr;
522           int order;
523 
524           /*
525            * In order to utilize 64K pages for an object with a size < 2M, we will
526            * need to support a 64K scratch page, given that every 16th entry for a
527            * page-table operating in 64K mode must point to a properly aligned 64K
528            * region, including any PTEs which happen to point to scratch.
529            *
530            * This is only relevant for the 48b PPGTT where we support
531            * huge-gtt-pages, see also i915_vma_insert().
532            *
533            * TODO: we should really consider write-protecting the scratch-page and
534            * sharing between ppgtt
535            */
536           if (i915_vm_is_48bit(vm) &&
537               HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
538                     order = get_order(I915_GTT_PAGE_SIZE_64K);
539                     page = alloc_pages(gfp | __GFP_ZERO | __GFP_NOWARN, order);
540                     if (page) {
541                               addr = dma_map_page(vm->dma, page, 0,
542                                                       I915_GTT_PAGE_SIZE_64K,
543                                                       PCI_DMA_BIDIRECTIONAL);
544                               if (unlikely(dma_mapping_error(vm->dma, addr))) {
545                                         __free_pages(page, order);
546                                         page = NULL;
547                               }
548 
549                               if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) {
550                                         dma_unmap_page(vm->dma, addr,
551                                                          I915_GTT_PAGE_SIZE_64K,
552                                                          PCI_DMA_BIDIRECTIONAL);
553                                         __free_pages(page, order);
554                                         page = NULL;
555                               }
556                     }
557           }
558 
559           if (!page) {
560                     order = 0;
561                     page = alloc_page(gfp | __GFP_ZERO);
562                     if (unlikely(!page))
563                               return -ENOMEM;
564 
565                     addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
566                                             PCI_DMA_BIDIRECTIONAL);
567                     if (unlikely(dma_mapping_error(vm->dma, addr))) {
568                               __free_page(page);
569                               return -ENOMEM;
570                     }
571           }
572 
573           vm->scratch_page.page = page;
574           vm->scratch_page.daddr = addr;
575           vm->scratch_page.order = order;
576 
577           return 0;
578 }
579 
cleanup_scratch_page(struct i915_address_space * vm)580 static void cleanup_scratch_page(struct i915_address_space *vm)
581 {
582           struct i915_page_dma *p = &vm->scratch_page;
583 
584           dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
585                            PCI_DMA_BIDIRECTIONAL);
586           __free_pages(p->page, p->order);
587 }
588 
alloc_pt(struct i915_address_space * vm)589 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
590 {
591           struct i915_page_table *pt;
592 
593           pt = kmalloc(sizeof(*pt), M_DRM, GFP_KERNEL | __GFP_NOWARN);
594           if (unlikely(!pt))
595                     return ERR_PTR(-ENOMEM);
596 
597           if (unlikely(setup_px(vm, pt))) {
598                     kfree(pt);
599                     return ERR_PTR(-ENOMEM);
600           }
601 
602           pt->used_ptes = 0;
603           return pt;
604 }
605 
free_pt(struct i915_address_space * vm,struct i915_page_table * pt)606 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
607 {
608           cleanup_px(vm, pt);
609           kfree(pt);
610 }
611 
gen8_initialize_pt(struct i915_address_space * vm,struct i915_page_table * pt)612 static void gen8_initialize_pt(struct i915_address_space *vm,
613                                      struct i915_page_table *pt)
614 {
615           fill_px(vm, pt,
616                     gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
617 }
618 
gen6_initialize_pt(struct i915_address_space * vm,struct i915_page_table * pt)619 static void gen6_initialize_pt(struct i915_address_space *vm,
620                                      struct i915_page_table *pt)
621 {
622           fill32_px(vm, pt,
623                       vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
624 }
625 
alloc_pd(struct i915_address_space * vm)626 static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
627 {
628           struct i915_page_directory *pd;
629 
630           pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
631           if (unlikely(!pd))
632                     return ERR_PTR(-ENOMEM);
633 
634           if (unlikely(setup_px(vm, pd))) {
635                     kfree(pd);
636                     return ERR_PTR(-ENOMEM);
637           }
638 
639           pd->used_pdes = 0;
640           return pd;
641 }
642 
free_pd(struct i915_address_space * vm,struct i915_page_directory * pd)643 static void free_pd(struct i915_address_space *vm,
644                         struct i915_page_directory *pd)
645 {
646           cleanup_px(vm, pd);
647           kfree(pd);
648 }
649 
gen8_initialize_pd(struct i915_address_space * vm,struct i915_page_directory * pd)650 static void gen8_initialize_pd(struct i915_address_space *vm,
651                                      struct i915_page_directory *pd)
652 {
653           unsigned int i;
654 
655           fill_px(vm, pd,
656                     gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
657           for (i = 0; i < I915_PDES; i++)
658                     pd->page_table[i] = vm->scratch_pt;
659 }
660 
__pdp_init(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp)661 static int __pdp_init(struct i915_address_space *vm,
662                           struct i915_page_directory_pointer *pdp)
663 {
664           const unsigned int pdpes = i915_pdpes_per_pdp(vm);
665           unsigned int i;
666 
667           pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
668                                                       GFP_KERNEL | __GFP_NOWARN);
669           if (unlikely(!pdp->page_directory))
670                     return -ENOMEM;
671 
672           for (i = 0; i < pdpes; i++)
673                     pdp->page_directory[i] = vm->scratch_pd;
674 
675           return 0;
676 }
677 
__pdp_fini(struct i915_page_directory_pointer * pdp)678 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
679 {
680           kfree(pdp->page_directory);
681           pdp->page_directory = NULL;
682 }
683 
use_4lvl(const struct i915_address_space * vm)684 static inline bool use_4lvl(const struct i915_address_space *vm)
685 {
686           return i915_vm_is_48bit(vm);
687 }
688 
689 static struct i915_page_directory_pointer *
alloc_pdp(struct i915_address_space * vm)690 alloc_pdp(struct i915_address_space *vm)
691 {
692           struct i915_page_directory_pointer *pdp;
693           int ret = -ENOMEM;
694 
695           WARN_ON(!use_4lvl(vm));
696 
697           pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
698           if (!pdp)
699                     return ERR_PTR(-ENOMEM);
700 
701           ret = __pdp_init(vm, pdp);
702           if (ret)
703                     goto fail_bitmap;
704 
705           ret = setup_px(vm, pdp);
706           if (ret)
707                     goto fail_page_m;
708 
709           return pdp;
710 
711 fail_page_m:
712           __pdp_fini(pdp);
713 fail_bitmap:
714           kfree(pdp);
715 
716           return ERR_PTR(ret);
717 }
718 
free_pdp(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp)719 static void free_pdp(struct i915_address_space *vm,
720                          struct i915_page_directory_pointer *pdp)
721 {
722           __pdp_fini(pdp);
723 
724           if (!use_4lvl(vm))
725                     return;
726 
727           cleanup_px(vm, pdp);
728           kfree(pdp);
729 }
730 
gen8_initialize_pdp(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp)731 static void gen8_initialize_pdp(struct i915_address_space *vm,
732                                         struct i915_page_directory_pointer *pdp)
733 {
734           gen8_ppgtt_pdpe_t scratch_pdpe;
735 
736           scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
737 
738           fill_px(vm, pdp, scratch_pdpe);
739 }
740 
gen8_initialize_pml4(struct i915_address_space * vm,struct i915_pml4 * pml4)741 static void gen8_initialize_pml4(struct i915_address_space *vm,
742                                          struct i915_pml4 *pml4)
743 {
744           unsigned int i;
745 
746           fill_px(vm, pml4,
747                     gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
748           for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
749                     pml4->pdps[i] = vm->scratch_pdp;
750 }
751 
752 /* Broadwell Page Directory Pointer Descriptors */
gen8_write_pdp(struct drm_i915_gem_request * req,unsigned entry,dma_addr_t addr)753 static int gen8_write_pdp(struct drm_i915_gem_request *req,
754                                 unsigned entry,
755                                 dma_addr_t addr)
756 {
757           struct intel_engine_cs *engine = req->engine;
758           u32 *cs;
759 
760           BUG_ON(entry >= 4);
761 
762           cs = intel_ring_begin(req, 6);
763           if (IS_ERR(cs))
764                     return PTR_ERR(cs);
765 
766           *cs++ = MI_LOAD_REGISTER_IMM(1);
767           *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
768           *cs++ = upper_32_bits(addr);
769           *cs++ = MI_LOAD_REGISTER_IMM(1);
770           *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
771           *cs++ = lower_32_bits(addr);
772           intel_ring_advance(req, cs);
773 
774           return 0;
775 }
776 
gen8_mm_switch_3lvl(struct i915_hw_ppgtt * ppgtt,struct drm_i915_gem_request * req)777 static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt,
778                                      struct drm_i915_gem_request *req)
779 {
780           int i, ret;
781 
782           for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) {
783                     const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
784 
785                     ret = gen8_write_pdp(req, i, pd_daddr);
786                     if (ret)
787                               return ret;
788           }
789 
790           return 0;
791 }
792 
gen8_mm_switch_4lvl(struct i915_hw_ppgtt * ppgtt,struct drm_i915_gem_request * req)793 static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt,
794                                      struct drm_i915_gem_request *req)
795 {
796           return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
797 }
798 
799 /* PDE TLBs are a pain to invalidate on GEN8+. When we modify
800  * the page table structures, we mark them dirty so that
801  * context switching/execlist queuing code takes extra steps
802  * to ensure that tlbs are flushed.
803  */
mark_tlbs_dirty(struct i915_hw_ppgtt * ppgtt)804 static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
805 {
806           ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
807 }
808 
809 /* Removes entries from a single page table, releasing it if it's empty.
810  * Caller can use the return value to update higher-level entries.
811  */
gen8_ppgtt_clear_pt(struct i915_address_space * vm,struct i915_page_table * pt,u64 start,u64 length)812 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
813                                         struct i915_page_table *pt,
814                                         u64 start, u64 length)
815 {
816           unsigned int num_entries = gen8_pte_count(start, length);
817           unsigned int pte = gen8_pte_index(start);
818           unsigned int pte_end = pte + num_entries;
819           const gen8_pte_t scratch_pte =
820                     gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
821           gen8_pte_t *vaddr;
822 
823           GEM_BUG_ON(num_entries > pt->used_ptes);
824 
825           pt->used_ptes -= num_entries;
826           if (!pt->used_ptes)
827                     return true;
828 
829           vaddr = kmap_atomic_px(pt);
830           while (pte < pte_end)
831                     vaddr[pte++] = scratch_pte;
832           kunmap_atomic(vaddr);
833 
834           return false;
835 }
836 
gen8_ppgtt_set_pde(struct i915_address_space * vm,struct i915_page_directory * pd,struct i915_page_table * pt,unsigned int pde)837 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
838                                      struct i915_page_directory *pd,
839                                      struct i915_page_table *pt,
840                                      unsigned int pde)
841 {
842           gen8_pde_t *vaddr;
843 
844           pd->page_table[pde] = pt;
845 
846           vaddr = kmap_atomic_px(pd);
847           vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
848           kunmap_atomic(vaddr);
849 }
850 
gen8_ppgtt_clear_pd(struct i915_address_space * vm,struct i915_page_directory * pd,u64 start,u64 length)851 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
852                                         struct i915_page_directory *pd,
853                                         u64 start, u64 length)
854 {
855           struct i915_page_table *pt;
856           u32 pde;
857 
858           gen8_for_each_pde(pt, pd, start, length, pde) {
859                     GEM_BUG_ON(pt == vm->scratch_pt);
860 
861                     if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
862                               continue;
863 
864                     gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
865                     GEM_BUG_ON(!pd->used_pdes);
866                     pd->used_pdes--;
867 
868                     free_pt(vm, pt);
869           }
870 
871           return !pd->used_pdes;
872 }
873 
gen8_ppgtt_set_pdpe(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp,struct i915_page_directory * pd,unsigned int pdpe)874 static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
875                                         struct i915_page_directory_pointer *pdp,
876                                         struct i915_page_directory *pd,
877                                         unsigned int pdpe)
878 {
879           gen8_ppgtt_pdpe_t *vaddr;
880 
881           pdp->page_directory[pdpe] = pd;
882           if (!use_4lvl(vm))
883                     return;
884 
885           vaddr = kmap_atomic_px(pdp);
886           vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
887           kunmap_atomic(vaddr);
888 }
889 
890 /* Removes entries from a single page dir pointer, releasing it if it's empty.
891  * Caller can use the return value to update higher-level entries
892  */
gen8_ppgtt_clear_pdp(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp,u64 start,u64 length)893 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
894                                          struct i915_page_directory_pointer *pdp,
895                                          u64 start, u64 length)
896 {
897           struct i915_page_directory *pd;
898           unsigned int pdpe;
899 
900           gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
901                     GEM_BUG_ON(pd == vm->scratch_pd);
902 
903                     if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
904                               continue;
905 
906                     gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
907                     GEM_BUG_ON(!pdp->used_pdpes);
908                     pdp->used_pdpes--;
909 
910                     free_pd(vm, pd);
911           }
912 
913           return !pdp->used_pdpes;
914 }
915 
gen8_ppgtt_clear_3lvl(struct i915_address_space * vm,u64 start,u64 length)916 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
917                                           u64 start, u64 length)
918 {
919           gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
920 }
921 
gen8_ppgtt_set_pml4e(struct i915_pml4 * pml4,struct i915_page_directory_pointer * pdp,unsigned int pml4e)922 static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
923                                          struct i915_page_directory_pointer *pdp,
924                                          unsigned int pml4e)
925 {
926           gen8_ppgtt_pml4e_t *vaddr;
927 
928           pml4->pdps[pml4e] = pdp;
929 
930           vaddr = kmap_atomic_px(pml4);
931           vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
932           kunmap_atomic(vaddr);
933 }
934 
935 /* Removes entries from a single pml4.
936  * This is the top-level structure in 4-level page tables used on gen8+.
937  * Empty entries are always scratch pml4e.
938  */
gen8_ppgtt_clear_4lvl(struct i915_address_space * vm,u64 start,u64 length)939 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
940                                           u64 start, u64 length)
941 {
942           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
943           struct i915_pml4 *pml4 = &ppgtt->pml4;
944           struct i915_page_directory_pointer *pdp;
945           unsigned int pml4e;
946 
947           GEM_BUG_ON(!use_4lvl(vm));
948 
949           gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
950                     GEM_BUG_ON(pdp == vm->scratch_pdp);
951 
952                     if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
953                               continue;
954 
955                     gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
956 
957                     free_pdp(vm, pdp);
958           }
959 }
960 
961 static inline struct sgt_dma {
962           struct scatterlist *sg;
963           dma_addr_t dma, max;
sgt_dma(struct i915_vma * vma)964 } sgt_dma(struct i915_vma *vma) {
965           struct scatterlist *sg = vma->pages->sgl;
966           dma_addr_t addr = sg_dma_address(sg);
967           return (struct sgt_dma) { sg, addr, addr + sg->length };
968 }
969 
970 struct gen8_insert_pte {
971           u16 pml4e;
972           u16 pdpe;
973           u16 pde;
974           u16 pte;
975 };
976 
gen8_insert_pte(u64 start)977 static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
978 {
979           return (struct gen8_insert_pte) {
980                      gen8_pml4e_index(start),
981                      gen8_pdpe_index(start),
982                      gen8_pde_index(start),
983                      gen8_pte_index(start),
984           };
985 }
986 
987 static __always_inline bool
gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt * ppgtt,struct i915_page_directory_pointer * pdp,struct sgt_dma * iter,struct gen8_insert_pte * idx,enum i915_cache_level cache_level)988 gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
989                                     struct i915_page_directory_pointer *pdp,
990                                     struct sgt_dma *iter,
991                                     struct gen8_insert_pte *idx,
992                                     enum i915_cache_level cache_level)
993 {
994           struct i915_page_directory *pd;
995           const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
996           gen8_pte_t *vaddr;
997           bool ret;
998 
999           GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1000           pd = pdp->page_directory[idx->pdpe];
1001           vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1002           do {
1003                     vaddr[idx->pte] = pte_encode | iter->dma;
1004 
1005                     iter->dma += PAGE_SIZE;
1006                     if (iter->dma >= iter->max) {
1007                               iter->sg = __sg_next(iter->sg);
1008                               if (!iter->sg) {
1009                                         ret = false;
1010                                         break;
1011                               }
1012 
1013                               iter->dma = sg_dma_address(iter->sg);
1014                               iter->max = iter->dma + iter->sg->length;
1015                     }
1016 
1017                     if (++idx->pte == GEN8_PTES) {
1018                               idx->pte = 0;
1019 
1020                               if (++idx->pde == I915_PDES) {
1021                                         idx->pde = 0;
1022 
1023                                         /* Limited by sg length for 3lvl */
1024                                         if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
1025                                                   idx->pdpe = 0;
1026                                                   ret = true;
1027                                                   break;
1028                                         }
1029 
1030                                         GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base));
1031                                         pd = pdp->page_directory[idx->pdpe];
1032                               }
1033 
1034                               kunmap_atomic(vaddr);
1035                               vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
1036                     }
1037           } while (1);
1038           kunmap_atomic(vaddr);
1039 
1040           return ret;
1041 }
1042 
gen8_ppgtt_insert_3lvl(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)1043 static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
1044                                            struct i915_vma *vma,
1045                                            enum i915_cache_level cache_level,
1046                                            u32 unused)
1047 {
1048           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1049           struct sgt_dma iter = sgt_dma(vma);
1050           struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1051 
1052           gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
1053                                               cache_level);
1054 
1055           vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1056 }
1057 
gen8_ppgtt_insert_huge_entries(struct i915_vma * vma,struct i915_page_directory_pointer ** pdps,struct sgt_dma * iter,enum i915_cache_level cache_level)1058 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
1059                                                      struct i915_page_directory_pointer **pdps,
1060                                                      struct sgt_dma *iter,
1061                                                      enum i915_cache_level cache_level)
1062 {
1063           const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
1064           u64 start = vma->node.start;
1065           dma_addr_t rem = iter->sg->length;
1066 
1067           do {
1068                     struct gen8_insert_pte idx = gen8_insert_pte(start);
1069                     struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
1070                     struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
1071                     unsigned int page_size;
1072                     bool maybe_64K = false;
1073                     gen8_pte_t encode = pte_encode;
1074                     gen8_pte_t *vaddr;
1075                     u16 index, max;
1076 
1077                     if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
1078                         IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
1079                         rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) {
1080                               index = idx.pde;
1081                               max = I915_PDES;
1082                               page_size = I915_GTT_PAGE_SIZE_2M;
1083 
1084                               encode |= GEN8_PDE_PS_2M;
1085 
1086                               vaddr = kmap_atomic_px(pd);
1087                     } else {
1088                               struct i915_page_table *pt = pd->page_table[idx.pde];
1089 
1090                               index = idx.pte;
1091                               max = GEN8_PTES;
1092                               page_size = I915_GTT_PAGE_SIZE;
1093 
1094                               if (!index &&
1095                                   vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1096                                   IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1097                                   (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1098                                    rem >= (max - index) << PAGE_SHIFT))
1099                                         maybe_64K = true;
1100 
1101                               vaddr = kmap_atomic_px(pt);
1102                     }
1103 
1104                     do {
1105                               GEM_BUG_ON(iter->sg->length < page_size);
1106                               vaddr[index++] = encode | iter->dma;
1107 
1108                               start += page_size;
1109                               iter->dma += page_size;
1110                               rem -= page_size;
1111                               if (iter->dma >= iter->max) {
1112                                         iter->sg = __sg_next(iter->sg);
1113                                         if (!iter->sg)
1114                                                   break;
1115 
1116                                         rem = iter->sg->length;
1117                                         iter->dma = sg_dma_address(iter->sg);
1118                                         iter->max = iter->dma + rem;
1119 
1120                                         if (maybe_64K && index < max &&
1121                                             !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
1122                                               (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
1123                                                rem >= (max - index) << PAGE_SHIFT)))
1124                                                   maybe_64K = false;
1125 
1126                                         if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
1127                                                   break;
1128                               }
1129                     } while (rem >= page_size && index < max);
1130 
1131                     kunmap_atomic(vaddr);
1132 
1133                     /*
1134                      * Is it safe to mark the 2M block as 64K? -- Either we have
1135                      * filled whole page-table with 64K entries, or filled part of
1136                      * it and have reached the end of the sg table and we have
1137                      * enough padding.
1138                      */
1139                     if (maybe_64K &&
1140                         (index == max ||
1141                          (i915_vm_has_scratch_64K(vma->vm) &&
1142                           !iter->sg && IS_ALIGNED(vma->node.start +
1143                                                         vma->node.size,
1144                                                         I915_GTT_PAGE_SIZE_2M)))) {
1145                               vaddr = kmap_atomic_px(pd);
1146                               vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
1147                               kunmap_atomic(vaddr);
1148                               page_size = I915_GTT_PAGE_SIZE_64K;
1149                     }
1150 
1151                     vma->page_sizes.gtt |= page_size;
1152           } while (iter->sg);
1153 }
1154 
gen8_ppgtt_insert_4lvl(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)1155 static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
1156                                            struct i915_vma *vma,
1157                                            enum i915_cache_level cache_level,
1158                                            u32 unused)
1159 {
1160           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1161           struct sgt_dma iter = sgt_dma(vma);
1162           struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
1163 
1164           if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
1165                     gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level);
1166           } else {
1167                     struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
1168 
1169                     while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
1170                                                                  &iter, &idx, cache_level))
1171                               GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
1172 
1173                     vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1174           }
1175 }
1176 
gen8_free_page_tables(struct i915_address_space * vm,struct i915_page_directory * pd)1177 static void gen8_free_page_tables(struct i915_address_space *vm,
1178                                           struct i915_page_directory *pd)
1179 {
1180           int i;
1181 
1182           if (!px_page(pd))
1183                     return;
1184 
1185           for (i = 0; i < I915_PDES; i++) {
1186                     if (pd->page_table[i] != vm->scratch_pt)
1187                               free_pt(vm, pd->page_table[i]);
1188           }
1189 }
1190 
gen8_init_scratch(struct i915_address_space * vm)1191 static int gen8_init_scratch(struct i915_address_space *vm)
1192 {
1193           int ret;
1194 
1195           ret = setup_scratch_page(vm, I915_GFP_DMA);
1196           if (ret)
1197                     return ret;
1198 
1199           vm->scratch_pt = alloc_pt(vm);
1200           if (IS_ERR(vm->scratch_pt)) {
1201                     ret = PTR_ERR(vm->scratch_pt);
1202                     goto free_scratch_page;
1203           }
1204 
1205           vm->scratch_pd = alloc_pd(vm);
1206           if (IS_ERR(vm->scratch_pd)) {
1207                     ret = PTR_ERR(vm->scratch_pd);
1208                     goto free_pt;
1209           }
1210 
1211           if (use_4lvl(vm)) {
1212                     vm->scratch_pdp = alloc_pdp(vm);
1213                     if (IS_ERR(vm->scratch_pdp)) {
1214                               ret = PTR_ERR(vm->scratch_pdp);
1215                               goto free_pd;
1216                     }
1217           }
1218 
1219           gen8_initialize_pt(vm, vm->scratch_pt);
1220           gen8_initialize_pd(vm, vm->scratch_pd);
1221           if (use_4lvl(vm))
1222                     gen8_initialize_pdp(vm, vm->scratch_pdp);
1223 
1224           return 0;
1225 
1226 free_pd:
1227           free_pd(vm, vm->scratch_pd);
1228 free_pt:
1229           free_pt(vm, vm->scratch_pt);
1230 free_scratch_page:
1231           cleanup_scratch_page(vm);
1232 
1233           return ret;
1234 }
1235 
gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt * ppgtt,bool create)1236 static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
1237 {
1238           struct i915_address_space *vm = &ppgtt->base;
1239           struct drm_i915_private *dev_priv = vm->i915;
1240           enum vgt_g2v_type msg;
1241           int i;
1242 
1243           if (use_4lvl(vm)) {
1244                     const u64 daddr = px_dma(&ppgtt->pml4);
1245 
1246                     I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
1247                     I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
1248 
1249                     msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
1250                                         VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
1251           } else {
1252                     for (i = 0; i < GEN8_3LVL_PDPES; i++) {
1253                               const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
1254 
1255                               I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
1256                               I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
1257                     }
1258 
1259                     msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
1260                                         VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
1261           }
1262 
1263           I915_WRITE(vgtif_reg(g2v_notify), msg);
1264 
1265           return 0;
1266 }
1267 
gen8_free_scratch(struct i915_address_space * vm)1268 static void gen8_free_scratch(struct i915_address_space *vm)
1269 {
1270           if (use_4lvl(vm))
1271                     free_pdp(vm, vm->scratch_pdp);
1272           free_pd(vm, vm->scratch_pd);
1273           free_pt(vm, vm->scratch_pt);
1274           cleanup_scratch_page(vm);
1275 }
1276 
gen8_ppgtt_cleanup_3lvl(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp)1277 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1278                                             struct i915_page_directory_pointer *pdp)
1279 {
1280           const unsigned int pdpes = i915_pdpes_per_pdp(vm);
1281           int i;
1282 
1283           for (i = 0; i < pdpes; i++) {
1284                     if (pdp->page_directory[i] == vm->scratch_pd)
1285                               continue;
1286 
1287                     gen8_free_page_tables(vm, pdp->page_directory[i]);
1288                     free_pd(vm, pdp->page_directory[i]);
1289           }
1290 
1291           free_pdp(vm, pdp);
1292 }
1293 
gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt * ppgtt)1294 static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
1295 {
1296           int i;
1297 
1298           for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
1299                     if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp)
1300                               continue;
1301 
1302                     gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
1303           }
1304 
1305           cleanup_px(&ppgtt->base, &ppgtt->pml4);
1306 }
1307 
gen8_ppgtt_cleanup(struct i915_address_space * vm)1308 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
1309 {
1310           struct drm_i915_private *dev_priv = vm->i915;
1311           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1312 
1313           if (intel_vgpu_active(dev_priv))
1314                     gen8_ppgtt_notify_vgt(ppgtt, false);
1315 
1316           if (use_4lvl(vm))
1317                     gen8_ppgtt_cleanup_4lvl(ppgtt);
1318           else
1319                     gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
1320 
1321           gen8_free_scratch(vm);
1322 }
1323 
gen8_ppgtt_alloc_pd(struct i915_address_space * vm,struct i915_page_directory * pd,u64 start,u64 length)1324 static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
1325                                      struct i915_page_directory *pd,
1326                                      u64 start, u64 length)
1327 {
1328           struct i915_page_table *pt;
1329           u64 from = start;
1330           unsigned int pde;
1331 
1332           gen8_for_each_pde(pt, pd, start, length, pde) {
1333                     int count = gen8_pte_count(start, length);
1334 
1335                     if (pt == vm->scratch_pt) {
1336                               pt = alloc_pt(vm);
1337                               if (IS_ERR(pt))
1338                                         goto unwind;
1339 
1340                               if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
1341                                         gen8_initialize_pt(vm, pt);
1342 
1343                               gen8_ppgtt_set_pde(vm, pd, pt, pde);
1344                               pd->used_pdes++;
1345                               GEM_BUG_ON(pd->used_pdes > I915_PDES);
1346                     }
1347 
1348                     pt->used_ptes += count;
1349           }
1350           return 0;
1351 
1352 unwind:
1353           gen8_ppgtt_clear_pd(vm, pd, from, start - from);
1354           return -ENOMEM;
1355 }
1356 
gen8_ppgtt_alloc_pdp(struct i915_address_space * vm,struct i915_page_directory_pointer * pdp,u64 start,u64 length)1357 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
1358                                         struct i915_page_directory_pointer *pdp,
1359                                         u64 start, u64 length)
1360 {
1361           struct i915_page_directory *pd;
1362           u64 from = start;
1363           unsigned int pdpe;
1364           int ret;
1365 
1366           gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1367                     if (pd == vm->scratch_pd) {
1368                               pd = alloc_pd(vm);
1369                               if (IS_ERR(pd))
1370                                         goto unwind;
1371 
1372                               gen8_initialize_pd(vm, pd);
1373                               gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1374                               pdp->used_pdpes++;
1375                               GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
1376 
1377                               mark_tlbs_dirty(i915_vm_to_ppgtt(vm));
1378                     }
1379 
1380                     ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
1381                     if (unlikely(ret))
1382                               goto unwind_pd;
1383           }
1384 
1385           return 0;
1386 
1387 unwind_pd:
1388           if (!pd->used_pdes) {
1389                     gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1390                     GEM_BUG_ON(!pdp->used_pdpes);
1391                     pdp->used_pdpes--;
1392                     free_pd(vm, pd);
1393           }
1394 unwind:
1395           gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
1396           return -ENOMEM;
1397 }
1398 
gen8_ppgtt_alloc_3lvl(struct i915_address_space * vm,u64 start,u64 length)1399 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
1400                                          u64 start, u64 length)
1401 {
1402           return gen8_ppgtt_alloc_pdp(vm,
1403                                             &i915_vm_to_ppgtt(vm)->pdp, start, length);
1404 }
1405 
gen8_ppgtt_alloc_4lvl(struct i915_address_space * vm,u64 start,u64 length)1406 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
1407                                          u64 start, u64 length)
1408 {
1409           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1410           struct i915_pml4 *pml4 = &ppgtt->pml4;
1411           struct i915_page_directory_pointer *pdp;
1412           u64 from = start;
1413           u32 pml4e;
1414           int ret;
1415 
1416           gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1417                     if (pml4->pdps[pml4e] == vm->scratch_pdp) {
1418                               pdp = alloc_pdp(vm);
1419                               if (IS_ERR(pdp))
1420                                         goto unwind;
1421 
1422                               gen8_initialize_pdp(vm, pdp);
1423                               gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1424                     }
1425 
1426                     ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
1427                     if (unlikely(ret))
1428                               goto unwind_pdp;
1429           }
1430 
1431           return 0;
1432 
1433 unwind_pdp:
1434           if (!pdp->used_pdpes) {
1435                     gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
1436                     free_pdp(vm, pdp);
1437           }
1438 unwind:
1439           gen8_ppgtt_clear_4lvl(vm, from, start - from);
1440           return -ENOMEM;
1441 }
1442 
gen8_dump_pdp(struct i915_hw_ppgtt * ppgtt,struct i915_page_directory_pointer * pdp,u64 start,u64 length,gen8_pte_t scratch_pte,struct seq_file * m)1443 static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
1444                                 struct i915_page_directory_pointer *pdp,
1445                                 u64 start, u64 length,
1446                                 gen8_pte_t scratch_pte,
1447                                 struct seq_file *m)
1448 {
1449           struct i915_address_space *vm = &ppgtt->base;
1450           struct i915_page_directory *pd;
1451           u32 pdpe;
1452 
1453           gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1454                     struct i915_page_table *pt;
1455                     u64 pd_len = length;
1456                     u64 pd_start = start;
1457                     u32 pde;
1458 
1459                     if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
1460                               continue;
1461 
1462                     seq_printf(m, "\tPDPE #%d\n", pdpe);
1463                     gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1464                               u32 pte;
1465                               gen8_pte_t *pt_vaddr;
1466 
1467                               if (pd->page_table[pde] == ppgtt->base.scratch_pt)
1468                                         continue;
1469 
1470                               pt_vaddr = kmap_atomic_px(pt);
1471                               for (pte = 0; pte < GEN8_PTES; pte += 4) {
1472                                         u64 va = (pdpe << GEN8_PDPE_SHIFT |
1473                                                     pde << GEN8_PDE_SHIFT |
1474                                                     pte << GEN8_PTE_SHIFT);
1475                                         int i;
1476                                         bool found = false;
1477 
1478                                         for (i = 0; i < 4; i++)
1479                                                   if (pt_vaddr[pte + i] != scratch_pte)
1480                                                             found = true;
1481                                         if (!found)
1482                                                   continue;
1483 
1484                                         seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1485                                         for (i = 0; i < 4; i++) {
1486                                                   if (pt_vaddr[pte + i] != scratch_pte)
1487                                                             seq_printf(m, " %llx", pt_vaddr[pte + i]);
1488                                                   else
1489                                                             seq_puts(m, "  SCRATCH ");
1490                                         }
1491                                         seq_puts(m, "\n");
1492                               }
1493                               kunmap_atomic(pt_vaddr);
1494                     }
1495           }
1496 }
1497 
gen8_dump_ppgtt(struct i915_hw_ppgtt * ppgtt,struct seq_file * m)1498 static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1499 {
1500           struct i915_address_space *vm = &ppgtt->base;
1501           const gen8_pte_t scratch_pte =
1502                     gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
1503           u64 start = 0, length = ppgtt->base.total;
1504 
1505           if (use_4lvl(vm)) {
1506                     u64 pml4e;
1507                     struct i915_pml4 *pml4 = &ppgtt->pml4;
1508                     struct i915_page_directory_pointer *pdp;
1509 
1510                     gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1511                               if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp)
1512                                         continue;
1513 
1514                               seq_printf(m, "    PML4E #%llu\n", pml4e);
1515                               gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1516                     }
1517           } else {
1518                     gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1519           }
1520 }
1521 
gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt * ppgtt)1522 static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1523 {
1524           struct i915_address_space *vm = &ppgtt->base;
1525           struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
1526           struct i915_page_directory *pd;
1527           u64 start = 0, length = ppgtt->base.total;
1528           u64 from = start;
1529           unsigned int pdpe;
1530 
1531           gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1532                     pd = alloc_pd(vm);
1533                     if (IS_ERR(pd))
1534                               goto unwind;
1535 
1536                     gen8_initialize_pd(vm, pd);
1537                     gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1538                     pdp->used_pdpes++;
1539           }
1540 
1541           pdp->used_pdpes++; /* never remove */
1542           return 0;
1543 
1544 unwind:
1545           start -= from;
1546           gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
1547                     gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
1548                     free_pd(vm, pd);
1549           }
1550           pdp->used_pdpes = 0;
1551           return -ENOMEM;
1552 }
1553 
1554 /*
1555  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1556  * with a net effect resembling a 2-level page table in normal x86 terms. Each
1557  * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1558  * space.
1559  *
1560  */
gen8_ppgtt_init(struct i915_hw_ppgtt * ppgtt)1561 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1562 {
1563           struct i915_address_space *vm = &ppgtt->base;
1564           struct drm_i915_private *dev_priv = vm->i915;
1565           int ret;
1566 
1567           ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ?
1568                     1ULL << 48 :
1569                     1ULL << 32;
1570 
1571           /* There are only few exceptions for gen >=6. chv and bxt.
1572            * And we are not sure about the latter so play safe for now.
1573            */
1574           if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
1575                     ppgtt->base.pt_kmap_wc = true;
1576 
1577           ret = gen8_init_scratch(&ppgtt->base);
1578           if (ret) {
1579                     ppgtt->base.total = 0;
1580                     return ret;
1581           }
1582 
1583           if (use_4lvl(vm)) {
1584                     ret = setup_px(&ppgtt->base, &ppgtt->pml4);
1585                     if (ret)
1586                               goto free_scratch;
1587 
1588                     gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1589 
1590                     ppgtt->switch_mm = gen8_mm_switch_4lvl;
1591                     ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
1592                     ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
1593                     ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
1594           } else {
1595                     ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
1596                     if (ret)
1597                               goto free_scratch;
1598 
1599                     if (intel_vgpu_active(dev_priv)) {
1600                               ret = gen8_preallocate_top_level_pdp(ppgtt);
1601                               if (ret) {
1602                                         __pdp_fini(&ppgtt->pdp);
1603                                         goto free_scratch;
1604                               }
1605                     }
1606 
1607                     ppgtt->switch_mm = gen8_mm_switch_3lvl;
1608                     ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl;
1609                     ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
1610                     ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
1611           }
1612 
1613           if (intel_vgpu_active(dev_priv))
1614                     gen8_ppgtt_notify_vgt(ppgtt, true);
1615 
1616           ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1617           ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1618           ppgtt->base.bind_vma = ppgtt_bind_vma;
1619           ppgtt->base.set_pages = ppgtt_set_pages;
1620           ppgtt->base.clear_pages = clear_pages;
1621           ppgtt->debug_dump = gen8_dump_ppgtt;
1622 
1623           return 0;
1624 
1625 free_scratch:
1626           gen8_free_scratch(&ppgtt->base);
1627           return ret;
1628 }
1629 
gen6_dump_ppgtt(struct i915_hw_ppgtt * ppgtt,struct seq_file * m)1630 static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1631 {
1632           struct i915_address_space *vm = &ppgtt->base;
1633           struct i915_page_table *unused;
1634           gen6_pte_t scratch_pte;
1635           u32 pd_entry, pte, pde;
1636           u32 start = 0, length = ppgtt->base.total;
1637 
1638           scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1639                                              I915_CACHE_LLC, 0);
1640 
1641           gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
1642                     u32 expected;
1643                     gen6_pte_t *pt_vaddr;
1644                     const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1645                     pd_entry = readl(ppgtt->pd_addr + pde);
1646                     expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
1647 
1648                     if (pd_entry != expected)
1649                               seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1650                                            pde,
1651                                            pd_entry,
1652                                            expected);
1653                     seq_printf(m, "\tPDE: %x\n", pd_entry);
1654 
1655                     pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
1656 
1657                     for (pte = 0; pte < GEN6_PTES; pte+=4) {
1658                               unsigned long va =
1659                                         (pde * PAGE_SIZE * GEN6_PTES) +
1660                                         (pte * PAGE_SIZE);
1661                               int i;
1662                               bool found = false;
1663                               for (i = 0; i < 4; i++)
1664                                         if (pt_vaddr[pte + i] != scratch_pte)
1665                                                   found = true;
1666                               if (!found)
1667                                         continue;
1668 
1669                               seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1670                               for (i = 0; i < 4; i++) {
1671                                         if (pt_vaddr[pte + i] != scratch_pte)
1672                                                   seq_printf(m, " %08x", pt_vaddr[pte + i]);
1673                                         else
1674                                                   seq_puts(m, "  SCRATCH ");
1675                               }
1676                               seq_puts(m, "\n");
1677                     }
1678                     kunmap_atomic(pt_vaddr);
1679           }
1680 }
1681 
1682 /* Write pde (index) from the page directory @pd to the page table @pt */
gen6_write_pde(const struct i915_hw_ppgtt * ppgtt,const unsigned int pde,const struct i915_page_table * pt)1683 static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
1684                                           const unsigned int pde,
1685                                           const struct i915_page_table *pt)
1686 {
1687           /* Caller needs to make sure the write completes if necessary */
1688           writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
1689                            ppgtt->pd_addr + pde);
1690 }
1691 
1692 /* Write all the page tables found in the ppgtt structure to incrementing page
1693  * directories. */
gen6_write_page_range(struct i915_hw_ppgtt * ppgtt,u32 start,u32 length)1694 static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
1695                                           u32 start, u32 length)
1696 {
1697           struct i915_page_table *pt;
1698           unsigned int pde;
1699 
1700           gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
1701                     gen6_write_pde(ppgtt, pde, pt);
1702 
1703           mark_tlbs_dirty(ppgtt);
1704           wmb();
1705 }
1706 
get_pd_offset(struct i915_hw_ppgtt * ppgtt)1707 static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt)
1708 {
1709           GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1710           return ppgtt->pd.base.ggtt_offset << 10;
1711 }
1712 
hsw_mm_switch(struct i915_hw_ppgtt * ppgtt,struct drm_i915_gem_request * req)1713 static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1714                                struct drm_i915_gem_request *req)
1715 {
1716           struct intel_engine_cs *engine = req->engine;
1717           u32 *cs;
1718 
1719           /* NB: TLBs must be flushed and invalidated before a switch */
1720           cs = intel_ring_begin(req, 6);
1721           if (IS_ERR(cs))
1722                     return PTR_ERR(cs);
1723 
1724           *cs++ = MI_LOAD_REGISTER_IMM(2);
1725           *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1726           *cs++ = PP_DIR_DCLV_2G;
1727           *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1728           *cs++ = get_pd_offset(ppgtt);
1729           *cs++ = MI_NOOP;
1730           intel_ring_advance(req, cs);
1731 
1732           return 0;
1733 }
1734 
gen7_mm_switch(struct i915_hw_ppgtt * ppgtt,struct drm_i915_gem_request * req)1735 static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1736                                 struct drm_i915_gem_request *req)
1737 {
1738           struct intel_engine_cs *engine = req->engine;
1739           u32 *cs;
1740 
1741           /* NB: TLBs must be flushed and invalidated before a switch */
1742           cs = intel_ring_begin(req, 6);
1743           if (IS_ERR(cs))
1744                     return PTR_ERR(cs);
1745 
1746           *cs++ = MI_LOAD_REGISTER_IMM(2);
1747           *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
1748           *cs++ = PP_DIR_DCLV_2G;
1749           *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
1750           *cs++ = get_pd_offset(ppgtt);
1751           *cs++ = MI_NOOP;
1752           intel_ring_advance(req, cs);
1753 
1754           return 0;
1755 }
1756 
gen6_mm_switch(struct i915_hw_ppgtt * ppgtt,struct drm_i915_gem_request * req)1757 static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1758                                 struct drm_i915_gem_request *req)
1759 {
1760           struct intel_engine_cs *engine = req->engine;
1761           struct drm_i915_private *dev_priv = req->i915;
1762 
1763           I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
1764           I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1765           return 0;
1766 }
1767 
gen8_ppgtt_enable(struct drm_i915_private * dev_priv)1768 static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1769 {
1770           struct intel_engine_cs *engine;
1771           enum intel_engine_id id;
1772 
1773           for_each_engine(engine, dev_priv, id) {
1774                     u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
1775                                          GEN8_GFX_PPGTT_48B : 0;
1776                     I915_WRITE(RING_MODE_GEN7(engine),
1777                                  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1778           }
1779 }
1780 
gen7_ppgtt_enable(struct drm_i915_private * dev_priv)1781 static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
1782 {
1783           struct intel_engine_cs *engine;
1784           u32 ecochk, ecobits;
1785           enum intel_engine_id id;
1786 
1787           ecobits = I915_READ(GAC_ECO_BITS);
1788           I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1789 
1790           ecochk = I915_READ(GAM_ECOCHK);
1791           if (IS_HASWELL(dev_priv)) {
1792                     ecochk |= ECOCHK_PPGTT_WB_HSW;
1793           } else {
1794                     ecochk |= ECOCHK_PPGTT_LLC_IVB;
1795                     ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1796           }
1797           I915_WRITE(GAM_ECOCHK, ecochk);
1798 
1799           for_each_engine(engine, dev_priv, id) {
1800                     /* GFX_MODE is per-ring on gen7+ */
1801                     I915_WRITE(RING_MODE_GEN7(engine),
1802                                  _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1803           }
1804 }
1805 
gen6_ppgtt_enable(struct drm_i915_private * dev_priv)1806 static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1807 {
1808           u32 ecochk, gab_ctl, ecobits;
1809 
1810           ecobits = I915_READ(GAC_ECO_BITS);
1811           I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1812                        ECOBITS_PPGTT_CACHE64B);
1813 
1814           gab_ctl = I915_READ(GAB_CTL);
1815           I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1816 
1817           ecochk = I915_READ(GAM_ECOCHK);
1818           I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1819 
1820           I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1821 }
1822 
1823 /* PPGTT support for Sandybdrige/Gen6 and later */
gen6_ppgtt_clear_range(struct i915_address_space * vm,u64 start,u64 length)1824 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1825                                            u64 start, u64 length)
1826 {
1827           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1828           unsigned int first_entry = start >> PAGE_SHIFT;
1829           unsigned int pde = first_entry / GEN6_PTES;
1830           unsigned int pte = first_entry % GEN6_PTES;
1831           unsigned int num_entries = length >> PAGE_SHIFT;
1832           gen6_pte_t scratch_pte =
1833                     vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1834 
1835           while (num_entries) {
1836                     struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
1837                     unsigned int end = min(pte + num_entries, GEN6_PTES);
1838                     gen6_pte_t *vaddr;
1839 
1840                     num_entries -= end - pte;
1841 
1842                     /* Note that the hw doesn't support removing PDE on the fly
1843                      * (they are cached inside the context with no means to
1844                      * invalidate the cache), so we can only reset the PTE
1845                      * entries back to scratch.
1846                      */
1847 
1848                     vaddr = kmap_atomic_px(pt);
1849                     do {
1850                               vaddr[pte++] = scratch_pte;
1851                     } while (pte < end);
1852                     kunmap_atomic(vaddr);
1853 
1854                     pte = 0;
1855           }
1856 }
1857 
gen6_ppgtt_insert_entries(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)1858 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
1859                                               struct i915_vma *vma,
1860                                               enum i915_cache_level cache_level,
1861                                               u32 flags)
1862 {
1863           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1864           unsigned first_entry = vma->node.start >> PAGE_SHIFT;
1865           unsigned act_pt = first_entry / GEN6_PTES;
1866           unsigned act_pte = first_entry % GEN6_PTES;
1867           const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
1868           struct sgt_dma iter = sgt_dma(vma);
1869           gen6_pte_t *vaddr;
1870 
1871           vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1872           do {
1873                     vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1874 
1875                     iter.dma += PAGE_SIZE;
1876                     if (iter.dma == iter.max) {
1877                               iter.sg = __sg_next(iter.sg);
1878                               if (!iter.sg)
1879                                         break;
1880 
1881                               iter.dma = sg_dma_address(iter.sg);
1882                               iter.max = iter.dma + iter.sg->length;
1883                     }
1884 
1885                     if (++act_pte == GEN6_PTES) {
1886                               kunmap_atomic(vaddr);
1887                               vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1888                               act_pte = 0;
1889                     }
1890           } while (1);
1891           kunmap_atomic(vaddr);
1892 
1893           vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
1894 }
1895 
gen6_alloc_va_range(struct i915_address_space * vm,u64 start,u64 length)1896 static int gen6_alloc_va_range(struct i915_address_space *vm,
1897                                      u64 start, u64 length)
1898 {
1899           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1900           struct i915_page_table *pt;
1901           u64 from = start;
1902           unsigned int pde;
1903           bool flush = false;
1904 
1905           gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1906                     if (pt == vm->scratch_pt) {
1907                               pt = alloc_pt(vm);
1908                               if (IS_ERR(pt))
1909                                         goto unwind_out;
1910 
1911                               gen6_initialize_pt(vm, pt);
1912                               ppgtt->pd.page_table[pde] = pt;
1913                               gen6_write_pde(ppgtt, pde, pt);
1914                               flush = true;
1915                     }
1916           }
1917 
1918           if (flush) {
1919                     mark_tlbs_dirty(ppgtt);
1920                     wmb();
1921           }
1922 
1923           return 0;
1924 
1925 unwind_out:
1926           gen6_ppgtt_clear_range(vm, from, start);
1927           return -ENOMEM;
1928 }
1929 
gen6_init_scratch(struct i915_address_space * vm)1930 static int gen6_init_scratch(struct i915_address_space *vm)
1931 {
1932           int ret;
1933 
1934           ret = setup_scratch_page(vm, I915_GFP_DMA);
1935           if (ret)
1936                     return ret;
1937 
1938           vm->scratch_pt = alloc_pt(vm);
1939           if (IS_ERR(vm->scratch_pt)) {
1940                     cleanup_scratch_page(vm);
1941                     return PTR_ERR(vm->scratch_pt);
1942           }
1943 
1944           gen6_initialize_pt(vm, vm->scratch_pt);
1945 
1946           return 0;
1947 }
1948 
gen6_free_scratch(struct i915_address_space * vm)1949 static void gen6_free_scratch(struct i915_address_space *vm)
1950 {
1951           free_pt(vm, vm->scratch_pt);
1952           cleanup_scratch_page(vm);
1953 }
1954 
gen6_ppgtt_cleanup(struct i915_address_space * vm)1955 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1956 {
1957           struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1958           struct i915_page_directory *pd = &ppgtt->pd;
1959           struct i915_page_table *pt;
1960           u32 pde;
1961 
1962           drm_mm_remove_node(&ppgtt->node);
1963 
1964           gen6_for_all_pdes(pt, pd, pde)
1965                     if (pt != vm->scratch_pt)
1966                               free_pt(vm, pt);
1967 
1968           gen6_free_scratch(vm);
1969 }
1970 
gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt * ppgtt)1971 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1972 {
1973           struct i915_address_space *vm = &ppgtt->base;
1974           struct drm_i915_private *dev_priv = ppgtt->base.i915;
1975           struct i915_ggtt *ggtt = &dev_priv->ggtt;
1976           int ret;
1977 
1978           /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1979            * allocator works in address space sizes, so it's multiplied by page
1980            * size. We allocate at the top of the GTT to avoid fragmentation.
1981            */
1982           BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
1983 
1984           ret = gen6_init_scratch(vm);
1985           if (ret)
1986                     return ret;
1987 
1988           ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
1989                                           GEN6_PD_SIZE, GEN6_PD_ALIGN,
1990                                           I915_COLOR_UNEVICTABLE,
1991                                           0, ggtt->base.total,
1992                                           PIN_HIGH);
1993           if (ret)
1994                     goto err_out;
1995 
1996           if (ppgtt->node.start < ggtt->mappable_end)
1997                     DRM_DEBUG("Forced to use aperture for PDEs\n");
1998 
1999           ppgtt->pd.base.ggtt_offset =
2000                     ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
2001 
2002           ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
2003                     ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
2004 
2005           return 0;
2006 
2007 err_out:
2008           gen6_free_scratch(vm);
2009           return ret;
2010 }
2011 
gen6_ppgtt_alloc(struct i915_hw_ppgtt * ppgtt)2012 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2013 {
2014           return gen6_ppgtt_allocate_page_directories(ppgtt);
2015 }
2016 
gen6_scratch_va_range(struct i915_hw_ppgtt * ppgtt,u64 start,u64 length)2017 static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2018                                           u64 start, u64 length)
2019 {
2020           struct i915_page_table *unused;
2021           u32 pde;
2022 
2023           gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
2024                     ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
2025 }
2026 
gen6_ppgtt_init(struct i915_hw_ppgtt * ppgtt)2027 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
2028 {
2029           struct drm_i915_private *dev_priv = ppgtt->base.i915;
2030           struct i915_ggtt *ggtt = &dev_priv->ggtt;
2031           int ret;
2032 
2033           ppgtt->base.pte_encode = ggtt->base.pte_encode;
2034           if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
2035                     ppgtt->switch_mm = gen6_mm_switch;
2036           else if (IS_HASWELL(dev_priv))
2037                     ppgtt->switch_mm = hsw_mm_switch;
2038           else if (IS_GEN7(dev_priv))
2039                     ppgtt->switch_mm = gen7_mm_switch;
2040           else
2041                     BUG();
2042 
2043           ret = gen6_ppgtt_alloc(ppgtt);
2044           if (ret)
2045                     return ret;
2046 
2047           ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2048 
2049           gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2050           gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
2051 
2052           ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
2053           if (ret) {
2054                     gen6_ppgtt_cleanup(&ppgtt->base);
2055                     return ret;
2056           }
2057 
2058           ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2059           ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
2060           ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2061           ppgtt->base.bind_vma = ppgtt_bind_vma;
2062           ppgtt->base.set_pages = ppgtt_set_pages;
2063           ppgtt->base.clear_pages = clear_pages;
2064           ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2065           ppgtt->debug_dump = gen6_dump_ppgtt;
2066 
2067           DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
2068                                ppgtt->node.size >> 20,
2069                                ppgtt->node.start / PAGE_SIZE);
2070 
2071           DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
2072                                ppgtt->pd.base.ggtt_offset << 10);
2073 
2074           return 0;
2075 }
2076 
__hw_ppgtt_init(struct i915_hw_ppgtt * ppgtt,struct drm_i915_private * dev_priv)2077 static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
2078                                  struct drm_i915_private *dev_priv)
2079 {
2080           ppgtt->base.i915 = dev_priv;
2081           ppgtt->base.dma = &dev_priv->drm.pdev->dev;
2082 
2083           if (INTEL_INFO(dev_priv)->gen < 8)
2084                     return gen6_ppgtt_init(ppgtt);
2085           else
2086                     return gen8_ppgtt_init(ppgtt);
2087 }
2088 
i915_address_space_init(struct i915_address_space * vm,struct drm_i915_private * dev_priv,const char * name)2089 static void i915_address_space_init(struct i915_address_space *vm,
2090                                             struct drm_i915_private *dev_priv,
2091                                             const char *name)
2092 {
2093           i915_gem_timeline_init(dev_priv, &vm->timeline, name);
2094 
2095           drm_mm_init(&vm->mm, 0, vm->total);
2096           vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
2097 
2098           INIT_LIST_HEAD(&vm->active_list);
2099           INIT_LIST_HEAD(&vm->inactive_list);
2100           INIT_LIST_HEAD(&vm->unbound_list);
2101 
2102           list_add_tail(&vm->global_link, &dev_priv->vm_list);
2103           pagevec_init(&vm->free_pages);
2104 }
2105 
i915_address_space_fini(struct i915_address_space * vm)2106 static void i915_address_space_fini(struct i915_address_space *vm)
2107 {
2108           if (pagevec_count(&vm->free_pages))
2109                     vm_free_pages_release(vm, true);
2110 
2111           i915_gem_timeline_fini(&vm->timeline);
2112           drm_mm_takedown(&vm->mm);
2113           list_del(&vm->global_link);
2114 }
2115 
gtt_write_workarounds(struct drm_i915_private * dev_priv)2116 static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
2117 {
2118           /* This function is for gtt related workarounds. This function is
2119            * called on driver load and after a GPU reset, so you can place
2120            * workarounds here even if they get overwritten by GPU reset.
2121            */
2122           /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
2123           if (IS_BROADWELL(dev_priv))
2124                     I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2125           else if (IS_CHERRYVIEW(dev_priv))
2126                     I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2127           else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
2128                     I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2129           else if (IS_GEN9_LP(dev_priv))
2130                     I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2131 
2132           /*
2133            * To support 64K PTEs we need to first enable the use of the
2134            * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
2135            * mmio, otherwise the page-walker will simply ignore the IPS bit. This
2136            * shouldn't be needed after GEN10.
2137            *
2138            * 64K pages were first introduced from BDW+, although technically they
2139            * only *work* from gen9+. For pre-BDW we instead have the option for
2140            * 32K pages, but we don't currently have any support for it in our
2141            * driver.
2142            */
2143           if (HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_64K) &&
2144               INTEL_GEN(dev_priv) <= 10)
2145                     I915_WRITE(GEN8_GAMW_ECO_DEV_RW_IA,
2146                                  I915_READ(GEN8_GAMW_ECO_DEV_RW_IA) |
2147                                  GAMW_ECO_ENABLE_64K_IPS_FIELD);
2148 }
2149 
i915_ppgtt_init_hw(struct drm_i915_private * dev_priv)2150 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
2151 {
2152           gtt_write_workarounds(dev_priv);
2153 
2154           /* In the case of execlists, PPGTT is enabled by the context descriptor
2155            * and the PDPs are contained within the context itself.  We don't
2156            * need to do anything here. */
2157           if (i915_modparams.enable_execlists)
2158                     return 0;
2159 
2160           if (!USES_PPGTT(dev_priv))
2161                     return 0;
2162 
2163           if (IS_GEN6(dev_priv))
2164                     gen6_ppgtt_enable(dev_priv);
2165           else if (IS_GEN7(dev_priv))
2166                     gen7_ppgtt_enable(dev_priv);
2167           else if (INTEL_GEN(dev_priv) >= 8)
2168                     gen8_ppgtt_enable(dev_priv);
2169           else
2170                     MISSING_CASE(INTEL_GEN(dev_priv));
2171 
2172           return 0;
2173 }
2174 
2175 struct i915_hw_ppgtt *
i915_ppgtt_create(struct drm_i915_private * dev_priv,struct drm_i915_file_private * fpriv,const char * name)2176 i915_ppgtt_create(struct drm_i915_private *dev_priv,
2177                       struct drm_i915_file_private *fpriv,
2178                       const char *name)
2179 {
2180           struct i915_hw_ppgtt *ppgtt;
2181           int ret;
2182 
2183           ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2184           if (!ppgtt)
2185                     return ERR_PTR(-ENOMEM);
2186 
2187           ret = __hw_ppgtt_init(ppgtt, dev_priv);
2188           if (ret) {
2189                     kfree(ppgtt);
2190                     return ERR_PTR(ret);
2191           }
2192 
2193           kref_init(&ppgtt->ref);
2194           i915_address_space_init(&ppgtt->base, dev_priv, name);
2195           ppgtt->base.file = fpriv;
2196 
2197           trace_i915_ppgtt_create(&ppgtt->base);
2198 
2199           return ppgtt;
2200 }
2201 
i915_ppgtt_close(struct i915_address_space * vm)2202 void i915_ppgtt_close(struct i915_address_space *vm)
2203 {
2204           struct list_head *phases[] = {
2205                     &vm->active_list,
2206                     &vm->inactive_list,
2207                     &vm->unbound_list,
2208                     NULL,
2209           }, **phase;
2210 
2211           GEM_BUG_ON(vm->closed);
2212           vm->closed = true;
2213 
2214           for (phase = phases; *phase; phase++) {
2215                     struct i915_vma *vma, *vn;
2216 
2217                     list_for_each_entry_safe(vma, vn, *phase, vm_link)
2218                               if (!i915_vma_is_closed(vma))
2219                                         i915_vma_close(vma);
2220           }
2221 }
2222 
i915_ppgtt_release(struct kref * kref)2223 void i915_ppgtt_release(struct kref *kref)
2224 {
2225           struct i915_hw_ppgtt *ppgtt =
2226                     container_of(kref, struct i915_hw_ppgtt, ref);
2227 
2228           trace_i915_ppgtt_release(&ppgtt->base);
2229 
2230           /* vmas should already be unbound and destroyed */
2231           WARN_ON(!list_empty(&ppgtt->base.active_list));
2232           WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2233           WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2234 
2235           ppgtt->base.cleanup(&ppgtt->base);
2236           i915_address_space_fini(&ppgtt->base);
2237           kfree(ppgtt);
2238 }
2239 
2240 /* Certain Gen5 chipsets require require idling the GPU before
2241  * unmapping anything from the GTT when VT-d is enabled.
2242  */
needs_idle_maps(struct drm_i915_private * dev_priv)2243 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2244 {
2245           /* Query intel_iommu to see if we need the workaround. Presumably that
2246            * was loaded first.
2247            */
2248           return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
2249 }
2250 
i915_check_and_clear_faults(struct drm_i915_private * dev_priv)2251 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2252 {
2253           struct intel_engine_cs *engine;
2254           enum intel_engine_id id;
2255 
2256           if (INTEL_INFO(dev_priv)->gen < 6)
2257                     return;
2258 
2259           for_each_engine(engine, dev_priv, id) {
2260                     u32 fault_reg;
2261                     fault_reg = I915_READ(RING_FAULT_REG(engine));
2262                     if (fault_reg & RING_FAULT_VALID) {
2263                               DRM_DEBUG_DRIVER("Unexpected fault\n"
2264                                                    "\tAddr: 0x%08ux\n"
2265                                                    "\tAddress space: %s\n"
2266                                                    "\tSource ID: %d\n"
2267                                                    "\tType: %d\n",
2268                                                    fault_reg & LINUX_PAGE_MASK,
2269                                                    fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2270                                                    RING_FAULT_SRCID(fault_reg),
2271                                                    RING_FAULT_FAULT_TYPE(fault_reg));
2272                               I915_WRITE(RING_FAULT_REG(engine),
2273                                            fault_reg & ~RING_FAULT_VALID);
2274                     }
2275           }
2276 
2277           /* Engine specific init may not have been done till this point. */
2278           if (dev_priv->engine[RCS])
2279                     POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2280 }
2281 
i915_gem_suspend_gtt_mappings(struct drm_i915_private * dev_priv)2282 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2283 {
2284           struct i915_ggtt *ggtt = &dev_priv->ggtt;
2285 
2286           /* Don't bother messing with faults pre GEN6 as we have little
2287            * documentation supporting that it's a good idea.
2288            */
2289           if (INTEL_GEN(dev_priv) < 6)
2290                     return;
2291 
2292           i915_check_and_clear_faults(dev_priv);
2293 
2294           ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
2295 
2296           i915_ggtt_invalidate(dev_priv);
2297 }
2298 
i915_gem_gtt_prepare_pages(struct drm_i915_gem_object * obj,struct sg_table * pages)2299 int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
2300                                      struct sg_table *pages)
2301 {
2302           do {
2303                     if (dma_map_sg(&obj->base.dev->pdev->dev,
2304                                      pages->sgl, pages->nents,
2305                                      PCI_DMA_BIDIRECTIONAL))
2306                               return 0;
2307 
2308                     /* If the DMA remap fails, one cause can be that we have
2309                      * too many objects pinned in a small remapping table,
2310                      * such as swiotlb. Incrementally purge all other objects and
2311                      * try again - if there are no more pages to remove from
2312                      * the DMA remapper, i915_gem_shrink will return 0.
2313                      */
2314                     GEM_BUG_ON(obj->mm.pages == pages);
2315           } while (i915_gem_shrink(to_i915(obj->base.dev),
2316                                          obj->base.size >> PAGE_SHIFT, NULL,
2317                                          I915_SHRINK_BOUND |
2318                                          I915_SHRINK_UNBOUND |
2319                                          I915_SHRINK_ACTIVE));
2320 
2321           return -ENOSPC;
2322 }
2323 
gen8_set_pte(void __iomem * addr,gen8_pte_t pte)2324 static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
2325 {
2326           writeq(pte, addr);
2327 }
2328 
gen8_ggtt_insert_page(struct i915_address_space * vm,dma_addr_t addr,u64 offset,enum i915_cache_level level,u32 unused)2329 static void gen8_ggtt_insert_page(struct i915_address_space *vm,
2330                                           dma_addr_t addr,
2331                                           u64 offset,
2332                                           enum i915_cache_level level,
2333                                           u32 unused)
2334 {
2335           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2336           gen8_pte_t __iomem *pte =
2337                     (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2338 
2339           gen8_set_pte(pte, gen8_pte_encode(addr, level));
2340 
2341           ggtt->invalidate(vm->i915);
2342 }
2343 
gen8_ggtt_insert_entries(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level level,u32 unused)2344 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2345                                              struct i915_vma *vma,
2346                                              enum i915_cache_level level,
2347                                              u32 unused)
2348 {
2349           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2350           struct sgt_iter sgt_iter;
2351           gen8_pte_t __iomem *gtt_entries;
2352           const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
2353           dma_addr_t addr;
2354 
2355           gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
2356           gtt_entries += vma->node.start >> PAGE_SHIFT;
2357           for_each_sgt_dma(addr, sgt_iter, vma->pages)
2358                     gen8_set_pte(gtt_entries++, pte_encode | addr);
2359 
2360           wmb();
2361 
2362           /* This next bit makes the above posting read even more important. We
2363            * want to flush the TLBs only after we're certain all the PTE updates
2364            * have finished.
2365            */
2366           ggtt->invalidate(vm->i915);
2367 }
2368 
gen6_ggtt_insert_page(struct i915_address_space * vm,dma_addr_t addr,u64 offset,enum i915_cache_level level,u32 flags)2369 static void gen6_ggtt_insert_page(struct i915_address_space *vm,
2370                                           dma_addr_t addr,
2371                                           u64 offset,
2372                                           enum i915_cache_level level,
2373                                           u32 flags)
2374 {
2375           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2376           gen6_pte_t __iomem *pte =
2377                     (gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2378 
2379           iowrite32(vm->pte_encode(addr, level, flags), pte);
2380 
2381           ggtt->invalidate(vm->i915);
2382 }
2383 
2384 /*
2385  * Binds an object into the global gtt with the specified cache level. The object
2386  * will be accessible to the GPU via commands whose operands reference offsets
2387  * within the global GTT as well as accessible by the GPU through the GMADR
2388  * mapped BAR (dev_priv->mm.gtt->gtt).
2389  */
gen6_ggtt_insert_entries(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level level,u32 flags)2390 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2391                                              struct i915_vma *vma,
2392                                              enum i915_cache_level level,
2393                                              u32 flags)
2394 {
2395           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2396           gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
2397           unsigned int i = vma->node.start >> PAGE_SHIFT;
2398           struct sgt_iter iter;
2399           dma_addr_t addr;
2400           for_each_sgt_dma(addr, iter, vma->pages)
2401                     iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
2402           wmb();
2403 
2404           /* This next bit makes the above posting read even more important. We
2405            * want to flush the TLBs only after we're certain all the PTE updates
2406            * have finished.
2407            */
2408           ggtt->invalidate(vm->i915);
2409 }
2410 
nop_clear_range(struct i915_address_space * vm,u64 start,u64 length)2411 static void nop_clear_range(struct i915_address_space *vm,
2412                                   u64 start, u64 length)
2413 {
2414 }
2415 
gen8_ggtt_clear_range(struct i915_address_space * vm,u64 start,u64 length)2416 static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2417                                           u64 start, u64 length)
2418 {
2419           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2420           unsigned first_entry = start >> PAGE_SHIFT;
2421           unsigned num_entries = length >> PAGE_SHIFT;
2422           const gen8_pte_t scratch_pte =
2423                     gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
2424           gen8_pte_t __iomem *gtt_base =
2425                     (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
2426           const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2427           int i;
2428 
2429           if (WARN(num_entries > max_entries,
2430                      "First entry = %d; Num entries = %d (max=%d)\n",
2431                      first_entry, num_entries, max_entries))
2432                     num_entries = max_entries;
2433 
2434           for (i = 0; i < num_entries; i++)
2435                     gen8_set_pte(&gtt_base[i], scratch_pte);
2436 }
2437 
bxt_vtd_ggtt_wa(struct i915_address_space * vm)2438 static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
2439 {
2440           struct drm_i915_private *dev_priv = vm->i915;
2441 
2442           /*
2443            * Make sure the internal GAM fifo has been cleared of all GTT
2444            * writes before exiting stop_machine(). This guarantees that
2445            * any aperture accesses waiting to start in another process
2446            * cannot back up behind the GTT writes causing a hang.
2447            * The register can be any arbitrary GAM register.
2448            */
2449           POSTING_READ(GFX_FLSH_CNTL_GEN6);
2450 }
2451 
2452 struct insert_page {
2453           struct i915_address_space *vm;
2454           dma_addr_t addr;
2455           u64 offset;
2456           enum i915_cache_level level;
2457 };
2458 
bxt_vtd_ggtt_insert_page__cb(void * _arg)2459 static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
2460 {
2461           struct insert_page *arg = _arg;
2462 
2463           gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
2464           bxt_vtd_ggtt_wa(arg->vm);
2465 
2466           return 0;
2467 }
2468 
bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space * vm,dma_addr_t addr,u64 offset,enum i915_cache_level level,u32 unused)2469 static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
2470                                                     dma_addr_t addr,
2471                                                     u64 offset,
2472                                                     enum i915_cache_level level,
2473                                                     u32 unused)
2474 {
2475           struct insert_page arg = { vm, addr, offset, level };
2476 
2477           stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
2478 }
2479 
2480 struct insert_entries {
2481           struct i915_address_space *vm;
2482           struct i915_vma *vma;
2483           enum i915_cache_level level;
2484 };
2485 
bxt_vtd_ggtt_insert_entries__cb(void * _arg)2486 static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
2487 {
2488           struct insert_entries *arg = _arg;
2489 
2490           gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0);
2491           bxt_vtd_ggtt_wa(arg->vm);
2492 
2493           return 0;
2494 }
2495 
bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level level,u32 unused)2496 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2497                                                        struct i915_vma *vma,
2498                                                        enum i915_cache_level level,
2499                                                        u32 unused)
2500 {
2501           struct insert_entries arg = { vm, vma, level };
2502 
2503           stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
2504 }
2505 
2506 struct clear_range {
2507           struct i915_address_space *vm;
2508           u64 start;
2509           u64 length;
2510 };
2511 
bxt_vtd_ggtt_clear_range__cb(void * _arg)2512 static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
2513 {
2514           struct clear_range *arg = _arg;
2515 
2516           gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
2517           bxt_vtd_ggtt_wa(arg->vm);
2518 
2519           return 0;
2520 }
2521 
bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space * vm,u64 start,u64 length)2522 static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
2523                                                     u64 start,
2524                                                     u64 length)
2525 {
2526           struct clear_range arg = { vm, start, length };
2527 
2528           stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
2529 }
2530 
gen6_ggtt_clear_range(struct i915_address_space * vm,u64 start,u64 length)2531 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2532                                           u64 start, u64 length)
2533 {
2534           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2535           unsigned first_entry = start >> PAGE_SHIFT;
2536           unsigned num_entries = length >> PAGE_SHIFT;
2537           gen6_pte_t scratch_pte, __iomem *gtt_base =
2538                     (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
2539           const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2540           int i;
2541 
2542           if (WARN(num_entries > max_entries,
2543                      "First entry = %d; Num entries = %d (max=%d)\n",
2544                      first_entry, num_entries, max_entries))
2545                     num_entries = max_entries;
2546 
2547           scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2548                                              I915_CACHE_LLC, 0);
2549 
2550           for (i = 0; i < num_entries; i++)
2551                     iowrite32(scratch_pte, &gtt_base[i]);
2552 }
2553 
i915_ggtt_insert_page(struct i915_address_space * vm,dma_addr_t addr,u64 offset,enum i915_cache_level cache_level,u32 unused)2554 static void i915_ggtt_insert_page(struct i915_address_space *vm,
2555                                           dma_addr_t addr,
2556                                           u64 offset,
2557                                           enum i915_cache_level cache_level,
2558                                           u32 unused)
2559 {
2560           unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2561                     AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2562 
2563           intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
2564 }
2565 
i915_ggtt_insert_entries(struct i915_address_space * vm,struct i915_vma * vma,enum i915_cache_level cache_level,u32 unused)2566 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2567                                              struct i915_vma *vma,
2568                                              enum i915_cache_level cache_level,
2569                                              u32 unused)
2570 {
2571           unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2572                     AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2573 
2574           intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
2575                                             flags);
2576 }
2577 
i915_ggtt_clear_range(struct i915_address_space * vm,u64 start,u64 length)2578 static void i915_ggtt_clear_range(struct i915_address_space *vm,
2579                                           u64 start, u64 length)
2580 {
2581           intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2582 }
2583 
ggtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)2584 static int ggtt_bind_vma(struct i915_vma *vma,
2585                                enum i915_cache_level cache_level,
2586                                u32 flags)
2587 {
2588           struct drm_i915_private *i915 = vma->vm->i915;
2589           struct drm_i915_gem_object *obj = vma->obj;
2590           u32 pte_flags;
2591 
2592           /* Currently applicable only to VLV */
2593           pte_flags = 0;
2594           if (obj->gt_ro)
2595                     pte_flags |= PTE_READ_ONLY;
2596 
2597           intel_runtime_pm_get(i915);
2598           vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2599           intel_runtime_pm_put(i915);
2600 
2601           vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
2602 
2603           /*
2604            * Without aliasing PPGTT there's no difference between
2605            * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2606            * upgrade to both bound if we bind either to avoid double-binding.
2607            */
2608           vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2609 
2610           return 0;
2611 }
2612 
ggtt_unbind_vma(struct i915_vma * vma)2613 static void ggtt_unbind_vma(struct i915_vma *vma)
2614 {
2615           struct drm_i915_private *i915 = vma->vm->i915;
2616 
2617           intel_runtime_pm_get(i915);
2618           vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2619           intel_runtime_pm_put(i915);
2620 }
2621 
aliasing_gtt_bind_vma(struct i915_vma * vma,enum i915_cache_level cache_level,u32 flags)2622 static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2623                                          enum i915_cache_level cache_level,
2624                                          u32 flags)
2625 {
2626           struct drm_i915_private *i915 = vma->vm->i915;
2627           u32 pte_flags;
2628           int ret;
2629 
2630           /* Currently applicable only to VLV */
2631           pte_flags = 0;
2632           if (vma->obj->gt_ro)
2633                     pte_flags |= PTE_READ_ONLY;
2634 
2635           if (flags & I915_VMA_LOCAL_BIND) {
2636                     struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
2637 
2638                     if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
2639                         appgtt->base.allocate_va_range) {
2640                               ret = appgtt->base.allocate_va_range(&appgtt->base,
2641                                                                            vma->node.start,
2642                                                                            vma->size);
2643                               if (ret)
2644                                         return ret;
2645                     }
2646 
2647                     appgtt->base.insert_entries(&appgtt->base, vma, cache_level,
2648                                                       pte_flags);
2649           }
2650 
2651           if (flags & I915_VMA_GLOBAL_BIND) {
2652                     intel_runtime_pm_get(i915);
2653                     vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
2654                     intel_runtime_pm_put(i915);
2655           }
2656 
2657           return 0;
2658 }
2659 
aliasing_gtt_unbind_vma(struct i915_vma * vma)2660 static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2661 {
2662           struct drm_i915_private *i915 = vma->vm->i915;
2663 
2664           if (vma->flags & I915_VMA_GLOBAL_BIND) {
2665                     intel_runtime_pm_get(i915);
2666                     vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2667                     intel_runtime_pm_put(i915);
2668           }
2669 
2670           if (vma->flags & I915_VMA_LOCAL_BIND) {
2671                     struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;
2672 
2673                     vm->clear_range(vm, vma->node.start, vma->size);
2674           }
2675 }
2676 
i915_gem_gtt_finish_pages(struct drm_i915_gem_object * obj,struct sg_table * pages)2677 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
2678                                      struct sg_table *pages)
2679 {
2680           struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
2681           struct device *kdev = &dev_priv->drm.pdev->dev;
2682           struct i915_ggtt *ggtt = &dev_priv->ggtt;
2683 
2684           if (unlikely(ggtt->do_idle_maps)) {
2685                     if (i915_gem_wait_for_idle(dev_priv, 0)) {
2686                               DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
2687                               /* Wait a bit, in hopes it avoids the hang */
2688                               udelay(10);
2689                     }
2690           }
2691 
2692           dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2693 }
2694 
ggtt_set_pages(struct i915_vma * vma)2695 static int ggtt_set_pages(struct i915_vma *vma)
2696 {
2697           int ret;
2698 
2699           GEM_BUG_ON(vma->pages);
2700 
2701           ret = i915_get_ggtt_vma_pages(vma);
2702           if (ret)
2703                     return ret;
2704 
2705           vma->page_sizes = vma->obj->mm.page_sizes;
2706 
2707           return 0;
2708 }
2709 
i915_gtt_color_adjust(const struct drm_mm_node * node,unsigned long color,u64 * start,u64 * end)2710 static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2711                                           unsigned long color,
2712                                           u64 *start,
2713                                           u64 *end)
2714 {
2715           if (node->allocated && node->color != color)
2716                     *start += I915_GTT_PAGE_SIZE;
2717 
2718           /* Also leave a space between the unallocated reserved node after the
2719            * GTT and any objects within the GTT, i.e. we use the color adjustment
2720            * to insert a guard page to prevent prefetches crossing over the
2721            * GTT boundary.
2722            */
2723           node = list_next_entry(node, node_list);
2724           if (node->color != color)
2725                     *end -= I915_GTT_PAGE_SIZE;
2726 }
2727 
i915_gem_init_aliasing_ppgtt(struct drm_i915_private * i915)2728 int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
2729 {
2730           struct i915_ggtt *ggtt = &i915->ggtt;
2731           struct i915_hw_ppgtt *ppgtt;
2732           int err;
2733 
2734           ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]");
2735           if (IS_ERR(ppgtt))
2736                     return PTR_ERR(ppgtt);
2737 
2738           if (WARN_ON(ppgtt->base.total < ggtt->base.total)) {
2739                     err = -ENODEV;
2740                     goto err_ppgtt;
2741           }
2742 
2743           if (ppgtt->base.allocate_va_range) {
2744                     /* Note we only pre-allocate as far as the end of the global
2745                      * GTT. On 48b / 4-level page-tables, the difference is very,
2746                      * very significant! We have to preallocate as GVT/vgpu does
2747                      * not like the page directory disappearing.
2748                      */
2749                     err = ppgtt->base.allocate_va_range(&ppgtt->base,
2750                                                                 0, ggtt->base.total);
2751                     if (err)
2752                               goto err_ppgtt;
2753           }
2754 
2755           i915->mm.aliasing_ppgtt = ppgtt;
2756 
2757           WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
2758           ggtt->base.bind_vma = aliasing_gtt_bind_vma;
2759 
2760           WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
2761           ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;
2762 
2763           return 0;
2764 
2765 err_ppgtt:
2766           i915_ppgtt_put(ppgtt);
2767           return err;
2768 }
2769 
i915_gem_fini_aliasing_ppgtt(struct drm_i915_private * i915)2770 void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
2771 {
2772           struct i915_ggtt *ggtt = &i915->ggtt;
2773           struct i915_hw_ppgtt *ppgtt;
2774 
2775           ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
2776           if (!ppgtt)
2777                     return;
2778 
2779           i915_ppgtt_put(ppgtt);
2780 
2781           ggtt->base.bind_vma = ggtt_bind_vma;
2782           ggtt->base.unbind_vma = ggtt_unbind_vma;
2783 }
2784 
i915_gem_init_ggtt(struct drm_i915_private * dev_priv)2785 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2786 {
2787           /* Let GEM Manage all of the aperture.
2788            *
2789            * However, leave one page at the end still bound to the scratch page.
2790            * There are a number of places where the hardware apparently prefetches
2791            * past the end of the object, and we've seen multiple hangs with the
2792            * GPU head pointer stuck in a batchbuffer bound at the last page of the
2793            * aperture.  One page should be enough to keep any prefetching inside
2794            * of the aperture.
2795            */
2796           struct i915_ggtt *ggtt = &dev_priv->ggtt;
2797           unsigned long hole_start, hole_end;
2798           struct drm_mm_node *entry;
2799           int ret;
2800           unsigned long mappable = min(ggtt->base.total, ggtt->mappable_end);
2801 
2802           ret = intel_vgt_balloon(dev_priv);
2803           if (ret)
2804                     return ret;
2805 
2806           /* Reserve a mappable slot for our lockless error capture */
2807           ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
2808                                                     PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
2809                                                     0, ggtt->mappable_end,
2810                                                     DRM_MM_INSERT_LOW);
2811           if (ret)
2812                     return ret;
2813 
2814           /* Clear any non-preallocated blocks */
2815           drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2816                     DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2817                                     hole_start, hole_end);
2818                     ggtt->base.clear_range(&ggtt->base, hole_start,
2819                                                hole_end - hole_start);
2820           }
2821 
2822 #ifdef __DragonFly__
2823           DRM_INFO("taking over the fictitious range 0x%llx-0x%llx\n",
2824               dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_end);
2825           vm_phys_fictitious_reg_range(dev_priv->ggtt.mappable_base,
2826                dev_priv->ggtt.mappable_base + mappable, VM_MEMATTR_WRITE_COMBINING);
2827 #endif
2828 
2829           /* And finally clear the reserved guard page */
2830           ggtt->base.clear_range(&ggtt->base,
2831                                      ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2832 
2833           if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2834                     ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2835                     if (ret)
2836                               goto err;
2837           }
2838 
2839           return 0;
2840 
2841 err:
2842           drm_mm_remove_node(&ggtt->error_capture);
2843           return ret;
2844 }
2845 
2846 /**
2847  * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2848  * @dev_priv: i915 device
2849  */
i915_ggtt_cleanup_hw(struct drm_i915_private * dev_priv)2850 void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2851 {
2852           struct i915_ggtt *ggtt = &dev_priv->ggtt;
2853           struct i915_vma *vma, *vn;
2854           struct pagevec *pvec;
2855 
2856           ggtt->base.closed = true;
2857 
2858           mutex_lock(&dev_priv->drm.struct_mutex);
2859           WARN_ON(!list_empty(&ggtt->base.active_list));
2860           list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
2861                     WARN_ON(i915_vma_unbind(vma));
2862           mutex_unlock(&dev_priv->drm.struct_mutex);
2863 
2864           i915_gem_cleanup_stolen(&dev_priv->drm);
2865 
2866           mutex_lock(&dev_priv->drm.struct_mutex);
2867           i915_gem_fini_aliasing_ppgtt(dev_priv);
2868 
2869           if (drm_mm_node_allocated(&ggtt->error_capture))
2870                     drm_mm_remove_node(&ggtt->error_capture);
2871 
2872           if (drm_mm_initialized(&ggtt->base.mm)) {
2873                     intel_vgt_deballoon(dev_priv);
2874                     i915_address_space_fini(&ggtt->base);
2875           }
2876 
2877           ggtt->base.cleanup(&ggtt->base);
2878 
2879           pvec = &dev_priv->mm.wc_stash;
2880           if (pvec->nr) {
2881                     set_pages_array_wb(pvec->pages, pvec->nr);
2882                     __pagevec_release(pvec);
2883           }
2884 
2885           mutex_unlock(&dev_priv->drm.struct_mutex);
2886 
2887           arch_phys_wc_del(ggtt->mtrr);
2888           io_mapping_fini(&ggtt->mappable);
2889 }
2890 
gen6_get_total_gtt_size(u16 snb_gmch_ctl)2891 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2892 {
2893           snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2894           snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2895           return snb_gmch_ctl << 20;
2896 }
2897 
gen8_get_total_gtt_size(u16 bdw_gmch_ctl)2898 static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2899 {
2900           bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2901           bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2902           if (bdw_gmch_ctl)
2903                     bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2904 
2905 #ifdef CONFIG_X86_32
2906           /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2907           if (bdw_gmch_ctl > 4)
2908                     bdw_gmch_ctl = 4;
2909 #endif
2910 
2911           return bdw_gmch_ctl << 20;
2912 }
2913 
chv_get_total_gtt_size(u16 gmch_ctrl)2914 static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2915 {
2916           gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2917           gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2918 
2919           if (gmch_ctrl)
2920                     return 1 << (20 + gmch_ctrl);
2921 
2922           return 0;
2923 }
2924 
gen6_get_stolen_size(u16 snb_gmch_ctl)2925 static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2926 {
2927           snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2928           snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2929           return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
2930 }
2931 
gen8_get_stolen_size(u16 bdw_gmch_ctl)2932 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2933 {
2934           bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2935           bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2936           return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
2937 }
2938 
chv_get_stolen_size(u16 gmch_ctrl)2939 static size_t chv_get_stolen_size(u16 gmch_ctrl)
2940 {
2941           gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2942           gmch_ctrl &= SNB_GMCH_GMS_MASK;
2943 
2944           /*
2945            * 0x0  to 0x10: 32MB increments starting at 0MB
2946            * 0x11 to 0x16: 4MB increments starting at 8MB
2947            * 0x17 to 0x1d: 4MB increments start at 36MB
2948            */
2949           if (gmch_ctrl < 0x11)
2950                     return (size_t)gmch_ctrl << 25;
2951           else if (gmch_ctrl < 0x17)
2952                     return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
2953           else
2954                     return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
2955 }
2956 
gen9_get_stolen_size(u16 gen9_gmch_ctl)2957 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2958 {
2959           gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2960           gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2961 
2962           if (gen9_gmch_ctl < 0xf0)
2963                     return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
2964           else
2965                     /* 4MB increments starting at 0xf0 for 4MB */
2966                     return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
2967 }
2968 
ggtt_probe_common(struct i915_ggtt * ggtt,u64 size)2969 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
2970 {
2971           struct drm_i915_private *dev_priv = ggtt->base.i915;
2972           struct pci_dev *pdev = dev_priv->drm.pdev;
2973           phys_addr_t phys_addr;
2974           int ret;
2975 
2976           /* For Modern GENs the PTEs and register space are split in the BAR */
2977           phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
2978 
2979           /*
2980            * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
2981            * will be dropped. For WC mappings in general we have 64 byte burst
2982            * writes when the WC buffer is flushed, so we can't use it, but have to
2983            * resort to an uncached mapping. The WC issue is easily caught by the
2984            * readback check when writing GTT PTE entries.
2985            */
2986           if (IS_GEN9_LP(dev_priv) || INTEL_GEN(dev_priv) >= 10)
2987                     ggtt->gsm = ioremap_nocache(phys_addr, size);
2988           else
2989                     ggtt->gsm = ioremap_wc(phys_addr, size);
2990           if (!ggtt->gsm) {
2991                     DRM_ERROR("Failed to map the ggtt page table\n");
2992                     return -ENOMEM;
2993           }
2994 
2995           ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
2996           if (ret) {
2997                     DRM_ERROR("Scratch setup failed\n");
2998                     /* iounmap will also get called at remove, but meh */
2999                     iounmap(ggtt->gsm);
3000                     return ret;
3001           }
3002 
3003           return 0;
3004 }
3005 
3006 static struct intel_ppat_entry *
__alloc_ppat_entry(struct intel_ppat * ppat,unsigned int index,u8 value)3007 __alloc_ppat_entry(struct intel_ppat *ppat, unsigned int index, u8 value)
3008 {
3009           struct intel_ppat_entry *entry = &ppat->entries[index];
3010 
3011           GEM_BUG_ON(index >= ppat->max_entries);
3012           GEM_BUG_ON(test_bit(index, ppat->used));
3013 
3014           entry->ppat = ppat;
3015           entry->value = value;
3016           kref_init(&entry->ref);
3017           set_bit(index, ppat->used);
3018           set_bit(index, ppat->dirty);
3019 
3020           return entry;
3021 }
3022 
__free_ppat_entry(struct intel_ppat_entry * entry)3023 static void __free_ppat_entry(struct intel_ppat_entry *entry)
3024 {
3025           struct intel_ppat *ppat = entry->ppat;
3026           unsigned int index = entry - ppat->entries;
3027 
3028           GEM_BUG_ON(index >= ppat->max_entries);
3029           GEM_BUG_ON(!test_bit(index, ppat->used));
3030 
3031           entry->value = ppat->clear_value;
3032           clear_bit(index, ppat->used);
3033           set_bit(index, ppat->dirty);
3034 }
3035 
3036 /**
3037  * intel_ppat_get - get a usable PPAT entry
3038  * @i915: i915 device instance
3039  * @value: the PPAT value required by the caller
3040  *
3041  * The function tries to search if there is an existing PPAT entry which
3042  * matches with the required value. If perfectly matched, the existing PPAT
3043  * entry will be used. If only partially matched, it will try to check if
3044  * there is any available PPAT index. If yes, it will allocate a new PPAT
3045  * index for the required entry and update the HW. If not, the partially
3046  * matched entry will be used.
3047  */
3048 const struct intel_ppat_entry *
intel_ppat_get(struct drm_i915_private * i915,u8 value)3049 intel_ppat_get(struct drm_i915_private *i915, u8 value)
3050 {
3051           struct intel_ppat *ppat = &i915->ppat;
3052           struct intel_ppat_entry *entry;
3053           unsigned int scanned, best_score;
3054           int i;
3055 
3056           GEM_BUG_ON(!ppat->max_entries);
3057 
3058           scanned = best_score = 0;
3059           for_each_set_bit(i, ppat->used, ppat->max_entries) {
3060                     unsigned int score;
3061 
3062                     score = ppat->match(ppat->entries[i].value, value);
3063                     if (score > best_score) {
3064                               entry = &ppat->entries[i];
3065                               if (score == INTEL_PPAT_PERFECT_MATCH) {
3066                                         kref_get(&entry->ref);
3067                                         return entry;
3068                               }
3069                               best_score = score;
3070                     }
3071                     scanned++;
3072           }
3073 
3074           if (scanned == ppat->max_entries) {
3075                     if (!best_score)
3076                               return ERR_PTR(-ENOSPC);
3077 
3078                     kref_get(&entry->ref);
3079                     return entry;
3080           }
3081 
3082           i = find_first_zero_bit(ppat->used, ppat->max_entries);
3083           entry = __alloc_ppat_entry(ppat, i, value);
3084           ppat->update_hw(i915);
3085           return entry;
3086 }
3087 
release_ppat(struct kref * kref)3088 static void release_ppat(struct kref *kref)
3089 {
3090           struct intel_ppat_entry *entry =
3091                     container_of(kref, struct intel_ppat_entry, ref);
3092           struct drm_i915_private *i915 = entry->ppat->i915;
3093 
3094           __free_ppat_entry(entry);
3095           entry->ppat->update_hw(i915);
3096 }
3097 
3098 /**
3099  * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
3100  * @entry: an intel PPAT entry
3101  *
3102  * Put back the PPAT entry got from intel_ppat_get(). If the PPAT index of the
3103  * entry is dynamically allocated, its reference count will be decreased. Once
3104  * the reference count becomes into zero, the PPAT index becomes free again.
3105  */
intel_ppat_put(const struct intel_ppat_entry * entry)3106 void intel_ppat_put(const struct intel_ppat_entry *entry)
3107 {
3108           struct intel_ppat *ppat = entry->ppat;
3109           unsigned int index = entry - ppat->entries;
3110 
3111           GEM_BUG_ON(!ppat->max_entries);
3112 
3113           kref_put(&ppat->entries[index].ref, release_ppat);
3114 }
3115 
cnl_private_pat_update_hw(struct drm_i915_private * dev_priv)3116 static void cnl_private_pat_update_hw(struct drm_i915_private *dev_priv)
3117 {
3118           struct intel_ppat *ppat = &dev_priv->ppat;
3119           int i;
3120 
3121           for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
3122                     I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
3123                     clear_bit(i, ppat->dirty);
3124           }
3125 }
3126 
bdw_private_pat_update_hw(struct drm_i915_private * dev_priv)3127 static void bdw_private_pat_update_hw(struct drm_i915_private *dev_priv)
3128 {
3129           struct intel_ppat *ppat = &dev_priv->ppat;
3130           u64 pat = 0;
3131           int i;
3132 
3133           for (i = 0; i < ppat->max_entries; i++)
3134                     pat |= GEN8_PPAT(i, ppat->entries[i].value);
3135 
3136           bitmap_clear(ppat->dirty, 0, ppat->max_entries);
3137 
3138           I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
3139           I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
3140 }
3141 
bdw_private_pat_match(u8 src,u8 dst)3142 static unsigned int bdw_private_pat_match(u8 src, u8 dst)
3143 {
3144           unsigned int score = 0;
3145           enum {
3146                     AGE_MATCH = BIT(0),
3147                     TC_MATCH = BIT(1),
3148                     CA_MATCH = BIT(2),
3149           };
3150 
3151           /* Cache attribute has to be matched. */
3152           if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
3153                     return 0;
3154 
3155           score |= CA_MATCH;
3156 
3157           if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
3158                     score |= TC_MATCH;
3159 
3160           if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
3161                     score |= AGE_MATCH;
3162 
3163           if (score == (AGE_MATCH | TC_MATCH | CA_MATCH))
3164                     return INTEL_PPAT_PERFECT_MATCH;
3165 
3166           return score;
3167 }
3168 
chv_private_pat_match(u8 src,u8 dst)3169 static unsigned int chv_private_pat_match(u8 src, u8 dst)
3170 {
3171           return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
3172                     INTEL_PPAT_PERFECT_MATCH : 0;
3173 }
3174 
cnl_setup_private_ppat(struct intel_ppat * ppat)3175 static void cnl_setup_private_ppat(struct intel_ppat *ppat)
3176 {
3177           ppat->max_entries = 8;
3178           ppat->update_hw = cnl_private_pat_update_hw;
3179           ppat->match = bdw_private_pat_match;
3180           ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3181 
3182           /* XXX: spec is unclear if this is still needed for CNL+ */
3183           if (!USES_PPGTT(ppat->i915)) {
3184                     __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3185                     return;
3186           }
3187 
3188           __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
3189           __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
3190           __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
3191           __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
3192           __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3193           __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3194           __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3195           __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3196 }
3197 
3198 /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
3199  * bits. When using advanced contexts each context stores its own PAT, but
3200  * writing this data shouldn't be harmful even in those cases. */
bdw_setup_private_ppat(struct intel_ppat * ppat)3201 static void bdw_setup_private_ppat(struct intel_ppat *ppat)
3202 {
3203           ppat->max_entries = 8;
3204           ppat->update_hw = bdw_private_pat_update_hw;
3205           ppat->match = bdw_private_pat_match;
3206           ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
3207 
3208           if (!USES_PPGTT(ppat->i915)) {
3209                     /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
3210                      * so RTL will always use the value corresponding to
3211                      * pat_sel = 000".
3212                      * So let's disable cache for GGTT to avoid screen corruptions.
3213                      * MOCS still can be used though.
3214                      * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
3215                      * before this patch, i.e. the same uncached + snooping access
3216                      * like on gen6/7 seems to be in effect.
3217                      * - So this just fixes blitter/render access. Again it looks
3218                      * like it's not just uncached access, but uncached + snooping.
3219                      * So we can still hold onto all our assumptions wrt cpu
3220                      * clflushing on LLC machines.
3221                      */
3222                     __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
3223                     return;
3224           }
3225 
3226           __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);      /* for normal objects, no eLLC */
3227           __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);  /* for something pointing to ptes? */
3228           __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);  /* for scanout with eLLC */
3229           __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                      /* Uncached objects, mostly for scanout */
3230           __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
3231           __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
3232           __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
3233           __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
3234 }
3235 
chv_setup_private_ppat(struct intel_ppat * ppat)3236 static void chv_setup_private_ppat(struct intel_ppat *ppat)
3237 {
3238           ppat->max_entries = 8;
3239           ppat->update_hw = bdw_private_pat_update_hw;
3240           ppat->match = chv_private_pat_match;
3241           ppat->clear_value = CHV_PPAT_SNOOP;
3242 
3243           /*
3244            * Map WB on BDW to snooped on CHV.
3245            *
3246            * Only the snoop bit has meaning for CHV, the rest is
3247            * ignored.
3248            *
3249            * The hardware will never snoop for certain types of accesses:
3250            * - CPU GTT (GMADR->GGTT->no snoop->memory)
3251            * - PPGTT page tables
3252            * - some other special cycles
3253            *
3254            * As with BDW, we also need to consider the following for GT accesses:
3255            * "For GGTT, there is NO pat_sel[2:0] from the entry,
3256            * so RTL will always use the value corresponding to
3257            * pat_sel = 000".
3258            * Which means we must set the snoop bit in PAT entry 0
3259            * in order to keep the global status page working.
3260            */
3261 
3262           __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
3263           __alloc_ppat_entry(ppat, 1, 0);
3264           __alloc_ppat_entry(ppat, 2, 0);
3265           __alloc_ppat_entry(ppat, 3, 0);
3266           __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
3267           __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
3268           __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
3269           __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
3270 }
3271 
gen6_gmch_remove(struct i915_address_space * vm)3272 static void gen6_gmch_remove(struct i915_address_space *vm)
3273 {
3274           struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
3275 
3276           iounmap(ggtt->gsm);
3277           cleanup_scratch_page(vm);
3278 }
3279 
setup_private_pat(struct drm_i915_private * dev_priv)3280 static void setup_private_pat(struct drm_i915_private *dev_priv)
3281 {
3282           struct intel_ppat *ppat = &dev_priv->ppat;
3283           int i;
3284 
3285           ppat->i915 = dev_priv;
3286 
3287           if (INTEL_GEN(dev_priv) >= 10)
3288                     cnl_setup_private_ppat(ppat);
3289           else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3290                     chv_setup_private_ppat(ppat);
3291           else
3292                     bdw_setup_private_ppat(ppat);
3293 
3294           GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
3295 
3296           for_each_clear_bit(i, ppat->used, ppat->max_entries) {
3297                     ppat->entries[i].value = ppat->clear_value;
3298                     ppat->entries[i].ppat = ppat;
3299                     set_bit(i, ppat->dirty);
3300           }
3301 
3302           ppat->update_hw(dev_priv);
3303 }
3304 
gen8_gmch_probe(struct i915_ggtt * ggtt)3305 static int gen8_gmch_probe(struct i915_ggtt *ggtt)
3306 {
3307           struct drm_i915_private *dev_priv = ggtt->base.i915;
3308           struct pci_dev *pdev = dev_priv->drm.pdev;
3309           unsigned int size;
3310           u16 snb_gmch_ctl;
3311           int err;
3312 
3313           /* TODO: We're not aware of mappable constraints on gen8 yet */
3314           ggtt->mappable_base = pci_resource_start(pdev, 2);
3315           ggtt->mappable_end = pci_resource_len(pdev, 2);
3316 
3317           err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
3318           if (!err)
3319                     err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
3320           if (err)
3321                     DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3322 
3323           pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3324 
3325           if (INTEL_GEN(dev_priv) >= 9) {
3326                     ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
3327                     size = gen8_get_total_gtt_size(snb_gmch_ctl);
3328           } else if (IS_CHERRYVIEW(dev_priv)) {
3329                     ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
3330                     size = chv_get_total_gtt_size(snb_gmch_ctl);
3331           } else {
3332                     ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
3333                     size = gen8_get_total_gtt_size(snb_gmch_ctl);
3334           }
3335 
3336           ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
3337           ggtt->base.cleanup = gen6_gmch_remove;
3338           ggtt->base.bind_vma = ggtt_bind_vma;
3339           ggtt->base.unbind_vma = ggtt_unbind_vma;
3340           ggtt->base.set_pages = ggtt_set_pages;
3341           ggtt->base.clear_pages = clear_pages;
3342           ggtt->base.insert_page = gen8_ggtt_insert_page;
3343           ggtt->base.clear_range = nop_clear_range;
3344           if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
3345                     ggtt->base.clear_range = gen8_ggtt_clear_range;
3346 
3347           ggtt->base.insert_entries = gen8_ggtt_insert_entries;
3348 
3349           /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3350           if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3351                     ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3352                     ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3353                     if (ggtt->base.clear_range != nop_clear_range)
3354                               ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3355           }
3356 
3357           /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
3358           if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
3359                     ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
3360                     ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
3361                     if (ggtt->base.clear_range != nop_clear_range)
3362                               ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
3363           }
3364 
3365           ggtt->invalidate = gen6_ggtt_invalidate;
3366 
3367           setup_private_pat(dev_priv);
3368 
3369           return ggtt_probe_common(ggtt, size);
3370 }
3371 
gen6_gmch_probe(struct i915_ggtt * ggtt)3372 static int gen6_gmch_probe(struct i915_ggtt *ggtt)
3373 {
3374           struct drm_i915_private *dev_priv = ggtt->base.i915;
3375           struct pci_dev *pdev = dev_priv->drm.pdev;
3376           unsigned int size;
3377           u16 snb_gmch_ctl;
3378           int err;
3379 
3380           ggtt->mappable_base = pci_resource_start(pdev, 2);
3381           ggtt->mappable_end = pci_resource_len(pdev, 2);
3382 
3383           /* 64/512MB is the current min/max we actually know of, but this is just
3384            * a coarse sanity check.
3385            */
3386           if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
3387                     DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
3388                     return -ENXIO;
3389           }
3390 
3391           err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
3392           if (!err)
3393                     err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
3394           if (err)
3395                     DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
3396           pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3397 
3398           ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
3399 
3400           size = gen6_get_total_gtt_size(snb_gmch_ctl);
3401           ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3402 
3403           ggtt->base.clear_range = gen6_ggtt_clear_range;
3404           ggtt->base.insert_page = gen6_ggtt_insert_page;
3405           ggtt->base.insert_entries = gen6_ggtt_insert_entries;
3406           ggtt->base.bind_vma = ggtt_bind_vma;
3407           ggtt->base.unbind_vma = ggtt_unbind_vma;
3408           ggtt->base.set_pages = ggtt_set_pages;
3409           ggtt->base.clear_pages = clear_pages;
3410           ggtt->base.cleanup = gen6_gmch_remove;
3411 
3412           ggtt->invalidate = gen6_ggtt_invalidate;
3413 
3414           if (HAS_EDRAM(dev_priv))
3415                     ggtt->base.pte_encode = iris_pte_encode;
3416           else if (IS_HASWELL(dev_priv))
3417                     ggtt->base.pte_encode = hsw_pte_encode;
3418           else if (IS_VALLEYVIEW(dev_priv))
3419                     ggtt->base.pte_encode = byt_pte_encode;
3420           else if (INTEL_GEN(dev_priv) >= 7)
3421                     ggtt->base.pte_encode = ivb_pte_encode;
3422           else
3423                     ggtt->base.pte_encode = snb_pte_encode;
3424 
3425           return ggtt_probe_common(ggtt, size);
3426 }
3427 
i915_gmch_remove(struct i915_address_space * vm)3428 static void i915_gmch_remove(struct i915_address_space *vm)
3429 {
3430           intel_gmch_remove();
3431 }
3432 
i915_gmch_probe(struct i915_ggtt * ggtt)3433 static int i915_gmch_probe(struct i915_ggtt *ggtt)
3434 {
3435           struct drm_i915_private *dev_priv = ggtt->base.i915;
3436 #if 0
3437           int ret;
3438 
3439           ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
3440           if (!ret) {
3441                     DRM_ERROR("failed to set up gmch\n");
3442                     return -EIO;
3443           }
3444 #endif
3445 
3446           intel_gtt_get(&ggtt->base.total,
3447                           &ggtt->stolen_size,
3448                           &ggtt->mappable_base,
3449                           &ggtt->mappable_end);
3450 
3451           ggtt->do_idle_maps = needs_idle_maps(dev_priv);
3452           ggtt->base.insert_page = i915_ggtt_insert_page;
3453           ggtt->base.insert_entries = i915_ggtt_insert_entries;
3454           ggtt->base.clear_range = i915_ggtt_clear_range;
3455           ggtt->base.bind_vma = ggtt_bind_vma;
3456           ggtt->base.unbind_vma = ggtt_unbind_vma;
3457           ggtt->base.set_pages = ggtt_set_pages;
3458           ggtt->base.clear_pages = clear_pages;
3459           ggtt->base.cleanup = i915_gmch_remove;
3460 
3461           ggtt->invalidate = gmch_ggtt_invalidate;
3462 
3463           if (unlikely(ggtt->do_idle_maps))
3464                     DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3465 
3466           return 0;
3467 }
3468 
3469 /**
3470  * i915_ggtt_probe_hw - Probe GGTT hardware location
3471  * @dev_priv: i915 device
3472  */
i915_ggtt_probe_hw(struct drm_i915_private * dev_priv)3473 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
3474 {
3475           struct i915_ggtt *ggtt = &dev_priv->ggtt;
3476           int ret;
3477 
3478           ggtt->base.i915 = dev_priv;
3479           ggtt->base.dma = &dev_priv->drm.pdev->dev;
3480 
3481           if (INTEL_GEN(dev_priv) <= 5)
3482                     ret = i915_gmch_probe(ggtt);
3483           else if (INTEL_GEN(dev_priv) < 8)
3484                     ret = gen6_gmch_probe(ggtt);
3485           else
3486                     ret = gen8_gmch_probe(ggtt);
3487           if (ret)
3488                     return ret;
3489 
3490           /* Trim the GGTT to fit the GuC mappable upper range (when enabled).
3491            * This is easier than doing range restriction on the fly, as we
3492            * currently don't have any bits spare to pass in this upper
3493            * restriction!
3494            */
3495           if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) {
3496                     ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
3497                     ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3498           }
3499 
3500           if ((ggtt->base.total - 1) >> 32) {
3501                     DRM_ERROR("We never expected a Global GTT with more than 32bits"
3502                                 " of address space! Found %lldM!\n",
3503                                 ggtt->base.total >> 20);
3504                     ggtt->base.total = 1ULL << 32;
3505                     ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
3506           }
3507 
3508           if (ggtt->mappable_end > ggtt->base.total) {
3509                     DRM_ERROR("mappable aperture extends past end of GGTT,"
3510                                 " aperture=%llx, total=%llx\n",
3511                                 ggtt->mappable_end, ggtt->base.total);
3512                     ggtt->mappable_end = ggtt->base.total;
3513           }
3514 
3515           /* GMADR is the PCI mmio aperture into the global GTT. */
3516           DRM_INFO("Memory usable by graphics device = %lluM\n",
3517                      ggtt->base.total >> 20);
3518           DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
3519           DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
3520           if (intel_vtd_active())
3521                     DRM_INFO("VT-d active for gfx access\n");
3522 
3523           return 0;
3524 }
3525 
3526 /**
3527  * i915_ggtt_init_hw - Initialize GGTT hardware
3528  * @dev_priv: i915 device
3529  */
i915_ggtt_init_hw(struct drm_i915_private * dev_priv)3530 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
3531 {
3532           struct i915_ggtt *ggtt = &dev_priv->ggtt;
3533           int ret;
3534 
3535           INIT_LIST_HEAD(&dev_priv->vm_list);
3536 
3537           /* Note that we use page colouring to enforce a guard page at the
3538            * end of the address space. This is required as the CS may prefetch
3539            * beyond the end of the batch buffer, across the page boundary,
3540            * and beyond the end of the GTT if we do not provide a guard.
3541            */
3542           mutex_lock(&dev_priv->drm.struct_mutex);
3543           i915_address_space_init(&ggtt->base, dev_priv, "[global]");
3544           if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
3545                     ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
3546           mutex_unlock(&dev_priv->drm.struct_mutex);
3547 
3548           if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
3549                                         dev_priv->ggtt.mappable_base,
3550                                         dev_priv->ggtt.mappable_end)) {
3551                     ret = -EIO;
3552                     goto out_gtt_cleanup;
3553           }
3554 
3555           ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
3556 
3557           /*
3558            * Initialise stolen early so that we may reserve preallocated
3559            * objects for the BIOS to KMS transition.
3560            */
3561           ret = i915_gem_init_stolen(dev_priv);
3562           if (ret)
3563                     goto out_gtt_cleanup;
3564 
3565           return 0;
3566 
3567 out_gtt_cleanup:
3568           ggtt->base.cleanup(&ggtt->base);
3569           return ret;
3570 }
3571 
i915_ggtt_enable_hw(struct drm_i915_private * dev_priv)3572 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3573 {
3574           if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3575                     return -EIO;
3576 
3577           return 0;
3578 }
3579 
i915_ggtt_enable_guc(struct drm_i915_private * i915)3580 void i915_ggtt_enable_guc(struct drm_i915_private *i915)
3581 {
3582           GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
3583 
3584           i915->ggtt.invalidate = guc_ggtt_invalidate;
3585 }
3586 
i915_ggtt_disable_guc(struct drm_i915_private * i915)3587 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
3588 {
3589           /* We should only be called after i915_ggtt_enable_guc() */
3590           GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
3591 
3592           i915->ggtt.invalidate = gen6_ggtt_invalidate;
3593 }
3594 
i915_gem_restore_gtt_mappings(struct drm_i915_private * dev_priv)3595 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3596 {
3597           struct i915_ggtt *ggtt = &dev_priv->ggtt;
3598           struct drm_i915_gem_object *obj, *on;
3599 
3600           i915_check_and_clear_faults(dev_priv);
3601 
3602           /* First fill our portion of the GTT with scratch pages */
3603           ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total);
3604 
3605           ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */
3606 
3607           /* clflush objects bound into the GGTT and rebind them. */
3608           list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) {
3609                     bool ggtt_bound = false;
3610                     struct i915_vma *vma;
3611 
3612                     list_for_each_entry(vma, &obj->vma_list, obj_link) {
3613                               if (vma->vm != &ggtt->base)
3614                                         continue;
3615 
3616                               if (!i915_vma_unbind(vma))
3617                                         continue;
3618 
3619                               WARN_ON(i915_vma_bind(vma, obj->cache_level,
3620                                                         PIN_UPDATE));
3621                               ggtt_bound = true;
3622                     }
3623 
3624                     if (ggtt_bound)
3625                               WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3626           }
3627 
3628           ggtt->base.closed = false;
3629 
3630           if (INTEL_GEN(dev_priv) >= 8) {
3631                     struct intel_ppat *ppat = &dev_priv->ppat;
3632 
3633                     bitmap_set(ppat->dirty, 0, ppat->max_entries);
3634                     dev_priv->ppat.update_hw(dev_priv);
3635                     return;
3636           }
3637 
3638           if (USES_PPGTT(dev_priv)) {
3639                     struct i915_address_space *vm;
3640 
3641                     list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3642                               struct i915_hw_ppgtt *ppgtt;
3643 
3644                               if (i915_is_ggtt(vm))
3645                                         ppgtt = dev_priv->mm.aliasing_ppgtt;
3646                               else
3647                                         ppgtt = i915_vm_to_ppgtt(vm);
3648 
3649                               gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
3650                     }
3651           }
3652 
3653           i915_ggtt_invalidate(dev_priv);
3654 }
3655 
3656 static struct scatterlist *
rotate_pages(const dma_addr_t * in,unsigned int offset,unsigned int width,unsigned int height,unsigned int stride,struct sg_table * st,struct scatterlist * sg)3657 rotate_pages(const dma_addr_t *in, unsigned int offset,
3658                unsigned int width, unsigned int height,
3659                unsigned int stride,
3660                struct sg_table *st, struct scatterlist *sg)
3661 {
3662           unsigned int column, row;
3663           unsigned int src_idx;
3664 
3665           for (column = 0; column < width; column++) {
3666                     src_idx = stride * (height - 1) + column;
3667                     for (row = 0; row < height; row++) {
3668                               st->nents++;
3669                               /* We don't need the pages, but need to initialize
3670                                * the entries so the sg list can be happily traversed.
3671                                * The only thing we need are DMA addresses.
3672                                */
3673                               sg_set_page(sg, NULL, PAGE_SIZE, 0);
3674                               sg_dma_address(sg) = in[offset + src_idx];
3675                               sg_dma_len(sg) = PAGE_SIZE;
3676                               sg = sg_next(sg);
3677                               src_idx -= stride;
3678                     }
3679           }
3680 
3681           return sg;
3682 }
3683 
3684 static noinline struct sg_table *
intel_rotate_pages(struct intel_rotation_info * rot_info,struct drm_i915_gem_object * obj)3685 intel_rotate_pages(struct intel_rotation_info *rot_info,
3686                        struct drm_i915_gem_object *obj)
3687 {
3688           const unsigned long n_pages = obj->base.size / PAGE_SIZE;
3689           unsigned int size = intel_rotation_info_size(rot_info);
3690           struct sgt_iter sgt_iter;
3691           dma_addr_t dma_addr;
3692           unsigned long i;
3693           dma_addr_t *page_addr_list;
3694           struct sg_table *st;
3695           struct scatterlist *sg;
3696           int ret = -ENOMEM;
3697 
3698           /* Allocate a temporary list of source pages for random access. */
3699           page_addr_list = kvmalloc_array(n_pages,
3700                                                   sizeof(dma_addr_t),
3701                                                   GFP_KERNEL);
3702           if (!page_addr_list)
3703                     return ERR_PTR(ret);
3704 
3705           /* Allocate target SG list. */
3706           st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3707           if (!st)
3708                     goto err_st_alloc;
3709 
3710           ret = sg_alloc_table(st, size, GFP_KERNEL);
3711           if (ret)
3712                     goto err_sg_alloc;
3713 
3714           /* Populate source page list from the object. */
3715           i = 0;
3716           for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3717                     page_addr_list[i++] = dma_addr;
3718 
3719           GEM_BUG_ON(i != n_pages);
3720           st->nents = 0;
3721           sg = st->sgl;
3722 
3723           for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
3724                     sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
3725                                           rot_info->plane[i].width, rot_info->plane[i].height,
3726                                           rot_info->plane[i].stride, st, sg);
3727           }
3728 
3729           DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
3730                           obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3731 
3732           kvfree(page_addr_list);
3733 
3734           return st;
3735 
3736 err_sg_alloc:
3737           kfree(st);
3738 err_st_alloc:
3739           kvfree(page_addr_list);
3740 
3741           DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
3742                           obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3743 
3744           return ERR_PTR(ret);
3745 }
3746 
3747 static noinline struct sg_table *
intel_partial_pages(const struct i915_ggtt_view * view,struct drm_i915_gem_object * obj)3748 intel_partial_pages(const struct i915_ggtt_view *view,
3749                         struct drm_i915_gem_object *obj)
3750 {
3751           struct sg_table *st;
3752           struct scatterlist *sg, *iter;
3753           unsigned int count = view->partial.size;
3754           unsigned int offset;
3755           int ret = -ENOMEM;
3756 
3757           st = kmalloc(sizeof(*st), M_DRM, GFP_KERNEL);
3758           if (!st)
3759                     goto err_st_alloc;
3760 
3761           ret = sg_alloc_table(st, count, GFP_KERNEL);
3762           if (ret)
3763                     goto err_sg_alloc;
3764 
3765           iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3766           GEM_BUG_ON(!iter);
3767 
3768           sg = st->sgl;
3769           st->nents = 0;
3770           do {
3771                     unsigned int len;
3772 
3773                     len = min(iter->length - (offset << PAGE_SHIFT),
3774                                 count << PAGE_SHIFT);
3775                     sg_set_page(sg, NULL, len, 0);
3776                     sg_dma_address(sg) =
3777                               sg_dma_address(iter) + (offset << PAGE_SHIFT);
3778                     sg_dma_len(sg) = len;
3779 
3780                     st->nents++;
3781                     count -= len >> PAGE_SHIFT;
3782                     if (count == 0) {
3783                               sg_mark_end(sg);
3784                               return st;
3785                     }
3786 
3787                     sg = __sg_next(sg);
3788                     iter = __sg_next(iter);
3789                     offset = 0;
3790           } while (1);
3791 
3792 err_sg_alloc:
3793           kfree(st);
3794 err_st_alloc:
3795           return ERR_PTR(ret);
3796 }
3797 
3798 static int
i915_get_ggtt_vma_pages(struct i915_vma * vma)3799 i915_get_ggtt_vma_pages(struct i915_vma *vma)
3800 {
3801           int ret;
3802 
3803           /* The vma->pages are only valid within the lifespan of the borrowed
3804            * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
3805            * must be the vma->pages. A simple rule is that vma->pages must only
3806            * be accessed when the obj->mm.pages are pinned.
3807            */
3808           GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
3809 
3810           switch (vma->ggtt_view.type) {
3811           case I915_GGTT_VIEW_NORMAL:
3812                     vma->pages = vma->obj->mm.pages;
3813                     return 0;
3814 
3815           case I915_GGTT_VIEW_ROTATED:
3816                     vma->pages =
3817                               intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
3818                     break;
3819 
3820           case I915_GGTT_VIEW_PARTIAL:
3821                     vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3822                     break;
3823 
3824           default:
3825                     WARN_ONCE(1, "GGTT view %u not implemented!\n",
3826                                 vma->ggtt_view.type);
3827                     return -EINVAL;
3828           }
3829 
3830           ret = 0;
3831           if (unlikely(IS_ERR(vma->pages))) {
3832                     ret = PTR_ERR(vma->pages);
3833                     vma->pages = NULL;
3834                     DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3835                                 vma->ggtt_view.type, ret);
3836           }
3837           return ret;
3838 }
3839 
3840 /**
3841  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3842  * @vm: the &struct i915_address_space
3843  * @node: the &struct drm_mm_node (typically i915_vma.mode)
3844  * @size: how much space to allocate inside the GTT,
3845  *        must be #I915_GTT_PAGE_SIZE aligned
3846  * @offset: where to insert inside the GTT,
3847  *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
3848  *          (@offset + @size) must fit within the address space
3849  * @color: color to apply to node, if this node is not from a VMA,
3850  *         color must be #I915_COLOR_UNEVICTABLE
3851  * @flags: control search and eviction behaviour
3852  *
3853  * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
3854  * the address space (using @size and @color). If the @node does not fit, it
3855  * tries to evict any overlapping nodes from the GTT, including any
3856  * neighbouring nodes if the colors do not match (to ensure guard pages between
3857  * differing domains). See i915_gem_evict_for_node() for the gory details
3858  * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
3859  * evicting active overlapping objects, and any overlapping node that is pinned
3860  * or marked as unevictable will also result in failure.
3861  *
3862  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3863  * asked to wait for eviction and interrupted.
3864  */
i915_gem_gtt_reserve(struct i915_address_space * vm,struct drm_mm_node * node,u64 size,u64 offset,unsigned long color,unsigned int flags)3865 int i915_gem_gtt_reserve(struct i915_address_space *vm,
3866                                struct drm_mm_node *node,
3867                                u64 size, u64 offset, unsigned long color,
3868                                unsigned int flags)
3869 {
3870           int err;
3871 
3872           GEM_BUG_ON(!size);
3873           GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3874           GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
3875           GEM_BUG_ON(range_overflows(offset, size, vm->total));
3876           GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3877           GEM_BUG_ON(drm_mm_node_allocated(node));
3878 
3879           node->size = size;
3880           node->start = offset;
3881           node->color = color;
3882 
3883           err = drm_mm_reserve_node(&vm->mm, node);
3884           if (err != -ENOSPC)
3885                     return err;
3886 
3887           if (flags & PIN_NOEVICT)
3888                     return -ENOSPC;
3889 
3890           err = i915_gem_evict_for_node(vm, node, flags);
3891           if (err == 0)
3892                     err = drm_mm_reserve_node(&vm->mm, node);
3893 
3894           return err;
3895 }
3896 
random_offset(u64 start,u64 end,u64 len,u64 align)3897 static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
3898 {
3899           u64 range, addr;
3900 
3901           GEM_BUG_ON(range_overflows(start, len, end));
3902           GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));
3903 
3904           range = round_down(end - len, align) - round_up(start, align);
3905           if (range) {
3906                     if (sizeof(unsigned long) == sizeof(u64)) {
3907                               addr = get_random_long();
3908                     } else {
3909                               addr = get_random_int();
3910                               if (range > U32_MAX) {
3911                                         addr <<= 32;
3912                                         addr |= get_random_int();
3913                               }
3914                     }
3915                     div64_u64_rem(addr, range, &addr);
3916                     start += addr;
3917           }
3918 
3919           return round_up(start, align);
3920 }
3921 
3922 /**
3923  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3924  * @vm: the &struct i915_address_space
3925  * @node: the &struct drm_mm_node (typically i915_vma.node)
3926  * @size: how much space to allocate inside the GTT,
3927  *        must be #I915_GTT_PAGE_SIZE aligned
3928  * @alignment: required alignment of starting offset, may be 0 but
3929  *             if specified, this must be a power-of-two and at least
3930  *             #I915_GTT_MIN_ALIGNMENT
3931  * @color: color to apply to node
3932  * @start: start of any range restriction inside GTT (0 for all),
3933  *         must be #I915_GTT_PAGE_SIZE aligned
3934  * @end: end of any range restriction inside GTT (U64_MAX for all),
3935  *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
3936  * @flags: control search and eviction behaviour
3937  *
3938  * i915_gem_gtt_insert() first searches for an available hole into which
3939  * is can insert the node. The hole address is aligned to @alignment and
3940  * its @size must then fit entirely within the [@start, @end] bounds. The
3941  * nodes on either side of the hole must match @color, or else a guard page
3942  * will be inserted between the two nodes (or the node evicted). If no
3943  * suitable hole is found, first a victim is randomly selected and tested
3944  * for eviction, otherwise then the LRU list of objects within the GTT
3945  * is scanned to find the first set of replacement nodes to create the hole.
3946  * Those old overlapping nodes are evicted from the GTT (and so must be
3947  * rebound before any future use). Any node that is currently pinned cannot
3948  * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
3949  * active and #PIN_NONBLOCK is specified, that node is also skipped when
3950  * searching for an eviction candidate. See i915_gem_evict_something() for
3951  * the gory details on the eviction algorithm.
3952  *
3953  * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
3954  * asked to wait for eviction and interrupted.
3955  */
i915_gem_gtt_insert(struct i915_address_space * vm,struct drm_mm_node * node,u64 size,u64 alignment,unsigned long color,u64 start,u64 end,unsigned int flags)3956 int i915_gem_gtt_insert(struct i915_address_space *vm,
3957                               struct drm_mm_node *node,
3958                               u64 size, u64 alignment, unsigned long color,
3959                               u64 start, u64 end, unsigned int flags)
3960 {
3961           enum drm_mm_insert_mode mode;
3962           u64 offset;
3963           int err;
3964 
3965           lockdep_assert_held(&vm->i915->drm.struct_mutex);
3966           GEM_BUG_ON(!size);
3967           GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
3968           GEM_BUG_ON(alignment && !is_power_of_2(alignment));
3969           GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
3970           GEM_BUG_ON(start >= end);
3971           GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
3972           GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3973           GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3974           GEM_BUG_ON(drm_mm_node_allocated(node));
3975 
3976           if (unlikely(range_overflows(start, size, end)))
3977                     return -ENOSPC;
3978 
3979           if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
3980                     return -ENOSPC;
3981 
3982           mode = DRM_MM_INSERT_BEST;
3983           if (flags & PIN_HIGH)
3984                     mode = DRM_MM_INSERT_HIGH;
3985           if (flags & PIN_MAPPABLE)
3986                     mode = DRM_MM_INSERT_LOW;
3987 
3988           /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
3989            * so we know that we always have a minimum alignment of 4096.
3990            * The drm_mm range manager is optimised to return results
3991            * with zero alignment, so where possible use the optimal
3992            * path.
3993            */
3994           BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
3995           if (alignment <= I915_GTT_MIN_ALIGNMENT)
3996                     alignment = 0;
3997 
3998           err = drm_mm_insert_node_in_range(&vm->mm, node,
3999                                                     size, alignment, color,
4000                                                     start, end, mode);
4001           if (err != -ENOSPC)
4002                     return err;
4003 
4004           if (flags & PIN_NOEVICT)
4005                     return -ENOSPC;
4006 
4007           /* No free space, pick a slot at random.
4008            *
4009            * There is a pathological case here using a GTT shared between
4010            * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
4011            *
4012            *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
4013            *         (64k objects)             (448k objects)
4014            *
4015            * Now imagine that the eviction LRU is ordered top-down (just because
4016            * pathology meets real life), and that we need to evict an object to
4017            * make room inside the aperture. The eviction scan then has to walk
4018            * the 448k list before it finds one within range. And now imagine that
4019            * it has to search for a new hole between every byte inside the memcpy,
4020            * for several simultaneous clients.
4021            *
4022            * On a full-ppgtt system, if we have run out of available space, there
4023            * will be lots and lots of objects in the eviction list! Again,
4024            * searching that LRU list may be slow if we are also applying any
4025            * range restrictions (e.g. restriction to low 4GiB) and so, for
4026            * simplicity and similarilty between different GTT, try the single
4027            * random replacement first.
4028            */
4029           offset = random_offset(start, end,
4030                                      size, alignment ?: I915_GTT_MIN_ALIGNMENT);
4031           err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
4032           if (err != -ENOSPC)
4033                     return err;
4034 
4035           /* Randomly selected placement is pinned, do a search */
4036           err = i915_gem_evict_something(vm, size, alignment, color,
4037                                                start, end, flags);
4038           if (err)
4039                     return err;
4040 
4041           return drm_mm_insert_node_in_range(&vm->mm, node,
4042                                                      size, alignment, color,
4043                                                      start, end, DRM_MM_INSERT_EVICT);
4044 }
4045 
4046 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
4047 #include "selftests/mock_gtt.c"
4048 #include "selftests/i915_gem_gtt.c"
4049 #endif
4050