xref: /NextBSD/sys/dev/drm2/i915/i915_gem.c (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1 /*
2  * Copyright © 2008 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  * Copyright (c) 2011 The FreeBSD Foundation
27  * All rights reserved.
28  *
29  * This software was developed by Konstantin Belousov under sponsorship from
30  * the FreeBSD Foundation.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  */
53 
54 #include <sys/cdefs.h>
55 __FBSDID("$FreeBSD$");
56 
57 #include <dev/drm2/drmP.h>
58 #include <dev/drm2/drm.h>
59 #include <dev/drm2/i915/i915_drm.h>
60 #include <dev/drm2/i915/i915_drv.h>
61 #include <dev/drm2/i915/intel_drv.h>
62 #include <dev/drm2/i915/intel_ringbuffer.h>
63 
64 #include <sys/resourcevar.h>
65 #include <sys/sched.h>
66 #include <sys/sf_buf.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_pageout.h>
70 
71 #include <machine/md_var.h>
72 
73 #define __user
74 #define __force
75 #define __iomem
76 #define	__must_check
77 #define	to_user_ptr(x) ((void *)(uintptr_t)(x))
78 #define	offset_in_page(x) ((x) & PAGE_MASK)
79 #define	page_to_phys(x) VM_PAGE_TO_PHYS(x)
80 
81 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
82 static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
83 static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
84 						    unsigned alignment,
85 						    bool map_and_fenceable);
86 static int i915_gem_phys_pwrite(struct drm_device *dev,
87 				struct drm_i915_gem_object *obj,
88 				struct drm_i915_gem_pwrite *args,
89 				struct drm_file *file);
90 
91 static void i915_gem_write_fence(struct drm_device *dev, int reg,
92 				 struct drm_i915_gem_object *obj);
93 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
94 					 struct drm_i915_fence_reg *fence,
95 					 bool enable);
96 
97 static void i915_gem_lowmem(void *arg);
98 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
99 
100 static int i915_gem_object_get_pages_range(struct drm_i915_gem_object *obj,
101     off_t start, off_t end);
102 static void i915_gem_object_put_pages_range(struct drm_i915_gem_object *obj,
103     off_t start, off_t end);
104 
105 static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex,
106     bool *fresh);
107 
108 MALLOC_DEFINE(DRM_I915_GEM, "i915gem", "Allocations from i915 gem");
109 long i915_gem_wired_pages_cnt;
110 
cpu_cache_is_coherent(struct drm_device * dev,enum i915_cache_level level)111 static bool cpu_cache_is_coherent(struct drm_device *dev,
112 				  enum i915_cache_level level)
113 {
114 	return HAS_LLC(dev) || level != I915_CACHE_NONE;
115 }
116 
cpu_write_needs_clflush(struct drm_i915_gem_object * obj)117 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
118 {
119 	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
120 		return true;
121 
122 	return obj->pin_display;
123 }
124 
i915_gem_object_fence_lost(struct drm_i915_gem_object * obj)125 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
126 {
127 	if (obj->tiling_mode)
128 		i915_gem_release_mmap(obj);
129 
130 	/* As we do not have an associated fence register, we will force
131 	 * a tiling change if we ever need to acquire one.
132 	 */
133 	obj->fence_dirty = false;
134 	obj->fence_reg = I915_FENCE_REG_NONE;
135 }
136 
137 /* some bookkeeping */
i915_gem_info_add_obj(struct drm_i915_private * dev_priv,size_t size)138 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
139 				  size_t size)
140 {
141 	dev_priv->mm.object_count++;
142 	dev_priv->mm.object_memory += size;
143 }
144 
i915_gem_info_remove_obj(struct drm_i915_private * dev_priv,size_t size)145 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
146 				     size_t size)
147 {
148 	dev_priv->mm.object_count--;
149 	dev_priv->mm.object_memory -= size;
150 }
151 
152 static int
i915_gem_wait_for_error(struct drm_device * dev)153 i915_gem_wait_for_error(struct drm_device *dev)
154 {
155 	struct drm_i915_private *dev_priv = dev->dev_private;
156 	int ret;
157 
158 	if (!atomic_load_acq_int(&dev_priv->mm.wedged))
159 		return (0);
160 
161 	mtx_lock(&dev_priv->error_completion_lock);
162 	while (dev_priv->error_completion == 0) {
163 		ret = -msleep(&dev_priv->error_completion,
164 		    &dev_priv->error_completion_lock, PCATCH, "915wco", 0);
165 		if (ret == -ERESTART)
166 			ret = -ERESTARTSYS;
167 		if (ret != 0) {
168 			mtx_unlock(&dev_priv->error_completion_lock);
169 			return (ret);
170 		}
171 	}
172 	mtx_unlock(&dev_priv->error_completion_lock);
173 
174 	if (atomic_load_acq_int(&dev_priv->mm.wedged)) {
175 		/* GPU is hung, bump the completion count to account for
176 		 * the token we just consumed so that we never hit zero and
177 		 * end up waiting upon a subsequent completion event that
178 		 * will never happen.
179 		 */
180 		mtx_lock(&dev_priv->error_completion_lock);
181 		dev_priv->error_completion++;
182 		mtx_unlock(&dev_priv->error_completion_lock);
183 	}
184 	return 0;
185 }
186 
i915_mutex_lock_interruptible(struct drm_device * dev)187 int i915_mutex_lock_interruptible(struct drm_device *dev)
188 {
189 	int ret;
190 
191 	ret = i915_gem_wait_for_error(dev);
192 	if (ret)
193 		return ret;
194 
195 	/*
196 	 * interruptible shall it be. might indeed be if dev_lock is
197 	 * changed to sx
198 	 */
199 	ret = -sx_xlock_sig(&dev->dev_struct_lock);
200 	if (ret)
201 		return ret;
202 
203 	return 0;
204 }
205 
206 static inline bool
i915_gem_object_is_inactive(struct drm_i915_gem_object * obj)207 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
208 {
209 	return !obj->active;
210 }
211 
212 int
i915_gem_init_ioctl(struct drm_device * dev,void * data,struct drm_file * file)213 i915_gem_init_ioctl(struct drm_device *dev, void *data,
214 		    struct drm_file *file)
215 {
216 	struct drm_i915_gem_init *args = data;
217 	drm_i915_private_t *dev_priv = dev->dev_private;
218 	int ret;
219 
220 	if (drm_core_check_feature(dev, DRIVER_MODESET))
221 		return -ENODEV;
222 
223 	if (args->gtt_start >= args->gtt_end ||
224 	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
225 		return -EINVAL;
226 
227 	if (mtx_initialized(&dev_priv->mm.gtt_space.unused_lock))
228 		return -EBUSY;
229 
230 	/* GEM with user mode setting was never supported on ilk and later. */
231 	if (INTEL_INFO(dev)->gen >= 5)
232 		return -ENODEV;
233 
234 	/*
235 	 * XXXKIB. The second-time initialization should be guarded
236 	 * against.
237 	 */
238 	DRM_LOCK(dev);
239 	ret = i915_gem_init_global_gtt(dev, args->gtt_start,
240 				 args->gtt_end, args->gtt_end);
241 	DRM_UNLOCK(dev);
242 
243 	return ret;
244 }
245 
246 int
i915_gem_get_aperture_ioctl(struct drm_device * dev,void * data,struct drm_file * file)247 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
248 			    struct drm_file *file)
249 {
250 	struct drm_i915_private *dev_priv = dev->dev_private;
251 	struct drm_i915_gem_get_aperture *args = data;
252 	struct drm_i915_gem_object *obj;
253 	size_t pinned;
254 
255 	pinned = 0;
256 	DRM_LOCK(dev);
257 	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
258 		if (obj->pin_count)
259 			pinned += obj->gtt_space->size;
260 	DRM_UNLOCK(dev);
261 
262 	args->aper_size = dev_priv->mm.gtt_total;
263 	args->aper_available_size = args->aper_size - pinned;
264 
265 	return 0;
266 }
267 
268 static int
i915_gem_create(struct drm_file * file,struct drm_device * dev,uint64_t size,uint32_t * handle_p)269 i915_gem_create(struct drm_file *file,
270 		struct drm_device *dev,
271 		uint64_t size,
272 		uint32_t *handle_p)
273 {
274 	struct drm_i915_gem_object *obj;
275 	int ret;
276 	u32 handle;
277 
278 	size = roundup(size, PAGE_SIZE);
279 	if (size == 0)
280 		return -EINVAL;
281 
282 	/* Allocate the new object */
283 	obj = i915_gem_alloc_object(dev, size);
284 	if (obj == NULL)
285 		return -ENOMEM;
286 
287 	ret = drm_gem_handle_create(file, &obj->base, &handle);
288 	if (ret) {
289 		drm_gem_object_release(&obj->base);
290 		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
291 		free(obj, DRM_I915_GEM);
292 		return ret;
293 	}
294 
295 	/* drop reference from allocate - handle holds it now */
296 	drm_gem_object_unreference(&obj->base);
297 	CTR2(KTR_DRM, "object_create %p %x", obj, size);
298 
299 	*handle_p = handle;
300 	return 0;
301 }
302 
303 int
i915_gem_dumb_create(struct drm_file * file,struct drm_device * dev,struct drm_mode_create_dumb * args)304 i915_gem_dumb_create(struct drm_file *file,
305 		     struct drm_device *dev,
306 		     struct drm_mode_create_dumb *args)
307 {
308 	/* have to work out size/pitch and return them */
309 	args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64);
310 	args->size = args->pitch * args->height;
311 	return i915_gem_create(file, dev,
312 			       args->size, &args->handle);
313 }
314 
i915_gem_dumb_destroy(struct drm_file * file,struct drm_device * dev,uint32_t handle)315 int i915_gem_dumb_destroy(struct drm_file *file,
316 			  struct drm_device *dev,
317 			  uint32_t handle)
318 {
319 	return drm_gem_handle_delete(file, handle);
320 }
321 
322 /**
323  * Creates a new mm object and returns a handle to it.
324  */
325 int
i915_gem_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file)326 i915_gem_create_ioctl(struct drm_device *dev, void *data,
327 		      struct drm_file *file)
328 {
329 	struct drm_i915_gem_create *args = data;
330 
331 	return i915_gem_create(file, dev,
332 			       args->size, &args->handle);
333 }
334 
i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object * obj)335 static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
336 {
337 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
338 
339 	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
340 		obj->tiling_mode != I915_TILING_NONE;
341 }
342 
343 static inline int
__copy_to_user_inatomic(void __user * to,const void * from,unsigned n)344 __copy_to_user_inatomic(void __user *to, const void *from, unsigned n)
345 {
346 	return (copyout_nofault(from, to, n) != 0 ? n : 0);
347 }
348 static inline unsigned long
__copy_from_user_inatomic_nocache(void * to,const void __user * from,unsigned long n)349 __copy_from_user_inatomic_nocache(void *to, const void __user *from,
350     unsigned long n)
351 {
352 
353 	/*
354 	 * XXXKIB.  Equivalent Linux function is implemented using
355 	 * MOVNTI for aligned moves.  For unaligned head and tail,
356 	 * normal move is performed.  As such, it is not incorrect, if
357 	 * only somewhat slower, to use normal copyin.  All uses
358 	 * except shmem_pwrite_fast() have the destination mapped WC.
359 	 */
360 	return ((copyin_nofault(__DECONST(void *, from), to, n) != 0 ? n : 0));
361 }
362 static inline int
fault_in_multipages_readable(const char __user * uaddr,int size)363 fault_in_multipages_readable(const char __user *uaddr, int size)
364 {
365 	char c;
366 	int ret = 0;
367 	const char __user *end = uaddr + size - 1;
368 
369 	if (unlikely(size == 0))
370 		return ret;
371 
372 	while (uaddr <= end) {
373 		ret = -copyin(uaddr, &c, 1);
374 		if (ret != 0)
375 			return -EFAULT;
376 		uaddr += PAGE_SIZE;
377 	}
378 
379 	/* Check whether the range spilled into the next page. */
380 	if (((unsigned long)uaddr & ~PAGE_MASK) ==
381 			((unsigned long)end & ~PAGE_MASK)) {
382 		ret = -copyin(end, &c, 1);
383 	}
384 
385 	return ret;
386 }
387 
388 static inline int
fault_in_multipages_writeable(char __user * uaddr,int size)389 fault_in_multipages_writeable(char __user *uaddr, int size)
390 {
391 	int ret = 0;
392 	char __user *end = uaddr + size - 1;
393 
394 	if (unlikely(size == 0))
395 		return ret;
396 
397 	/*
398 	 * Writing zeroes into userspace here is OK, because we know that if
399 	 * the zero gets there, we'll be overwriting it.
400 	 */
401 	while (uaddr <= end) {
402 		ret = subyte(uaddr, 0);
403 		if (ret != 0)
404 			return -EFAULT;
405 		uaddr += PAGE_SIZE;
406 	}
407 
408 	/* Check whether the range spilled into the next page. */
409 	if (((unsigned long)uaddr & ~PAGE_MASK) ==
410 			((unsigned long)end & ~PAGE_MASK))
411 		ret = subyte(end, 0);
412 
413 	return ret;
414 }
415 
416 static inline int
__copy_to_user_swizzled(char __user * cpu_vaddr,const char * gpu_vaddr,int gpu_offset,int length)417 __copy_to_user_swizzled(char __user *cpu_vaddr,
418 			const char *gpu_vaddr, int gpu_offset,
419 			int length)
420 {
421 	int ret, cpu_offset = 0;
422 
423 	while (length > 0) {
424 		int cacheline_end = roundup2(gpu_offset + 1, 64);
425 		int this_length = min(cacheline_end - gpu_offset, length);
426 		int swizzled_gpu_offset = gpu_offset ^ 64;
427 
428 		ret = __copy_to_user(cpu_vaddr + cpu_offset,
429 				     gpu_vaddr + swizzled_gpu_offset,
430 				     this_length);
431 		if (ret)
432 			return ret + length;
433 
434 		cpu_offset += this_length;
435 		gpu_offset += this_length;
436 		length -= this_length;
437 	}
438 
439 	return 0;
440 }
441 
442 static inline int
__copy_from_user_swizzled(char * gpu_vaddr,int gpu_offset,const char __user * cpu_vaddr,int length)443 __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
444 			  const char __user *cpu_vaddr,
445 			  int length)
446 {
447 	int ret, cpu_offset = 0;
448 
449 	while (length > 0) {
450 		int cacheline_end = roundup2(gpu_offset + 1, 64);
451 		int this_length = min(cacheline_end - gpu_offset, length);
452 		int swizzled_gpu_offset = gpu_offset ^ 64;
453 
454 		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
455 				       cpu_vaddr + cpu_offset,
456 				       this_length);
457 		if (ret)
458 			return ret + length;
459 
460 		cpu_offset += this_length;
461 		gpu_offset += this_length;
462 		length -= this_length;
463 	}
464 
465 	return 0;
466 }
467 
468 /* Per-page copy function for the shmem pread fastpath.
469  * Flushes invalid cachelines before reading the target if
470  * needs_clflush is set. */
471 static int
shmem_pread_fast(vm_page_t page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)472 shmem_pread_fast(vm_page_t page, int shmem_page_offset, int page_length,
473 		 char __user *user_data,
474 		 bool page_do_bit17_swizzling, bool needs_clflush)
475 {
476 	char *vaddr;
477 	struct sf_buf *sf;
478 	int ret;
479 
480 	if (unlikely(page_do_bit17_swizzling))
481 		return -EINVAL;
482 
483 	sched_pin();
484 	sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE);
485 	if (sf == NULL) {
486 		sched_unpin();
487 		return (-EFAULT);
488 	}
489 	vaddr = (char *)sf_buf_kva(sf);
490 	if (needs_clflush)
491 		drm_clflush_virt_range(vaddr + shmem_page_offset,
492 				       page_length);
493 	ret = __copy_to_user_inatomic(user_data,
494 				      vaddr + shmem_page_offset,
495 				      page_length);
496 	sf_buf_free(sf);
497 	sched_unpin();
498 
499 	return ret ? -EFAULT : 0;
500 }
501 
502 static void
shmem_clflush_swizzled_range(char * addr,unsigned long length,bool swizzled)503 shmem_clflush_swizzled_range(char *addr, unsigned long length,
504 			     bool swizzled)
505 {
506 	if (unlikely(swizzled)) {
507 		unsigned long start = (unsigned long) addr;
508 		unsigned long end = (unsigned long) addr + length;
509 
510 		/* For swizzling simply ensure that we always flush both
511 		 * channels. Lame, but simple and it works. Swizzled
512 		 * pwrite/pread is far from a hotpath - current userspace
513 		 * doesn't use it at all. */
514 		start = rounddown2(start, 128);
515 		end = roundup2(end, 128);
516 
517 		drm_clflush_virt_range((void *)start, end - start);
518 	} else {
519 		drm_clflush_virt_range(addr, length);
520 	}
521 
522 }
523 
524 /* Only difference to the fast-path function is that this can handle bit17
525  * and uses non-atomic copy and kmap functions. */
526 static int
shmem_pread_slow(vm_page_t page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush)527 shmem_pread_slow(vm_page_t page, int shmem_page_offset, int page_length,
528 		 char __user *user_data,
529 		 bool page_do_bit17_swizzling, bool needs_clflush)
530 {
531 	char *vaddr;
532 	struct sf_buf *sf;
533 	int ret;
534 
535 	sf = sf_buf_alloc(page, 0);
536 	vaddr = (char *)sf_buf_kva(sf);
537 	if (needs_clflush)
538 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
539 					     page_length,
540 					     page_do_bit17_swizzling);
541 
542 	if (page_do_bit17_swizzling)
543 		ret = __copy_to_user_swizzled(user_data,
544 					      vaddr, shmem_page_offset,
545 					      page_length);
546 	else
547 		ret = __copy_to_user(user_data,
548 				     vaddr + shmem_page_offset,
549 				     page_length);
550 	sf_buf_free(sf);
551 
552 	return ret ? - EFAULT : 0;
553 }
554 
555 static int
i915_gem_shmem_pread(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pread * args,struct drm_file * file)556 i915_gem_shmem_pread(struct drm_device *dev,
557 		     struct drm_i915_gem_object *obj,
558 		     struct drm_i915_gem_pread *args,
559 		     struct drm_file *file)
560 {
561 	char __user *user_data;
562 	ssize_t remain, sremain;
563 	off_t offset, soffset;
564 	int shmem_page_offset, page_length, ret = 0;
565 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
566 	int prefaulted = 0;
567 	int needs_clflush = 0;
568 
569 	user_data = to_user_ptr(args->data_ptr);
570 	sremain = remain = args->size;
571 
572 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
573 
574 	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
575 		/* If we're not in the cpu read domain, set ourself into the gtt
576 		 * read domain and manually flush cachelines (if required). This
577 		 * optimizes for the case when the gpu will dirty the data
578 		 * anyway again before the next pread happens. */
579 		needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
580 		ret = i915_gem_object_set_to_gtt_domain(obj, false);
581 		if (ret)
582 			return ret;
583 	}
584 
585 	soffset = offset = args->offset;
586 	ret = i915_gem_object_get_pages_range(obj, soffset, soffset + sremain);
587 	if (ret)
588 		return ret;
589 
590 	i915_gem_object_pin_pages(obj);
591 
592 	VM_OBJECT_WLOCK(obj->base.vm_obj);
593 	for (vm_page_t page = vm_page_find_least(obj->base.vm_obj,
594 	    OFF_TO_IDX(offset));; page = vm_page_next(page)) {
595 		VM_OBJECT_WUNLOCK(obj->base.vm_obj);
596 
597 		if (remain <= 0)
598 			break;
599 
600 		/* Operation in this page
601 		 *
602 		 * shmem_page_offset = offset within page in shmem file
603 		 * page_length = bytes to copy for this page
604 		 */
605 		shmem_page_offset = offset_in_page(offset);
606 		page_length = remain;
607 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
608 			page_length = PAGE_SIZE - shmem_page_offset;
609 
610 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
611 			(page_to_phys(page) & (1 << 17)) != 0;
612 
613 		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
614 				       user_data, page_do_bit17_swizzling,
615 				       needs_clflush);
616 		if (ret == 0)
617 			goto next_page;
618 
619 		DRM_UNLOCK(dev);
620 
621 		if (likely(!i915_prefault_disable) && !prefaulted) {
622 			ret = fault_in_multipages_writeable(user_data, remain);
623 			/* Userspace is tricking us, but we've already clobbered
624 			 * its pages with the prefault and promised to write the
625 			 * data up to the first fault. Hence ignore any errors
626 			 * and just continue. */
627 			(void)ret;
628 			prefaulted = 1;
629 		}
630 
631 		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
632 				       user_data, page_do_bit17_swizzling,
633 				       needs_clflush);
634 
635 		DRM_LOCK(dev);
636 
637 next_page:
638 		vm_page_reference(page);
639 
640 		if (ret)
641 			goto out;
642 
643 		remain -= page_length;
644 		user_data += page_length;
645 		offset += page_length;
646 		VM_OBJECT_WLOCK(obj->base.vm_obj);
647 	}
648 
649 out:
650 	i915_gem_object_unpin_pages(obj);
651 	i915_gem_object_put_pages_range(obj, soffset, soffset + sremain);
652 
653 	return ret;
654 }
655 
656 /**
657  * Reads data from the object referenced by handle.
658  *
659  * On error, the contents of *data are undefined.
660  */
661 int
i915_gem_pread_ioctl(struct drm_device * dev,void * data,struct drm_file * file)662 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
663 		     struct drm_file *file)
664 {
665 	struct drm_i915_gem_pread *args = data;
666 	struct drm_i915_gem_object *obj;
667 	int ret = 0;
668 
669 	if (args->size == 0)
670 		return 0;
671 
672 	if (!useracc(to_user_ptr(args->data_ptr), args->size, VM_PROT_WRITE))
673 		return -EFAULT;
674 
675 	ret = i915_mutex_lock_interruptible(dev);
676 	if (ret)
677 		return ret;
678 
679 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
680 	if (&obj->base == NULL) {
681 		ret = -ENOENT;
682 		goto unlock;
683 	}
684 
685 	/* Bounds check source.  */
686 	if (args->offset > obj->base.size ||
687 	    args->size > obj->base.size - args->offset) {
688 		ret = -EINVAL;
689 		goto out;
690 	}
691 
692 #if 1
693 	KIB_NOTYET();
694 #else
695 	/* prime objects have no backing filp to GEM pread/pwrite
696 	 * pages from.
697 	 */
698 	if (!obj->base.filp) {
699 		ret = -EINVAL;
700 		goto out;
701 	}
702 #endif
703 
704 	CTR3(KTR_DRM, "pread %p %jx %jx", obj, args->offset, args->size);
705 
706 	ret = i915_gem_shmem_pread(dev, obj, args, file);
707 
708 out:
709 	drm_gem_object_unreference(&obj->base);
710 unlock:
711 	DRM_UNLOCK(dev);
712 	return ret;
713 }
714 
715 /* This is the fast write path which cannot handle
716  * page faults in the source data
717  */
718 
719 static inline int
fast_user_write(struct drm_device * dev,off_t page_base,int page_offset,char __user * user_data,int length)720 fast_user_write(struct drm_device *dev,
721 		off_t page_base, int page_offset,
722 		char __user *user_data,
723 		int length)
724 {
725 	void __iomem *vaddr_atomic;
726 	void *vaddr;
727 	unsigned long unwritten;
728 
729 	vaddr_atomic = pmap_mapdev_attr(dev->agp->base + page_base,
730 	    length, PAT_WRITE_COMBINING);
731 	/* We can use the cpu mem copy function because this is X86. */
732 	vaddr = (char __force*)vaddr_atomic + page_offset;
733 	unwritten = __copy_from_user_inatomic_nocache(vaddr,
734 						      user_data, length);
735 	pmap_unmapdev((vm_offset_t)vaddr_atomic, length);
736 	return unwritten;
737 }
738 
739 /**
740  * This is the fast pwrite path, where we copy the data directly from the
741  * user into the GTT, uncached.
742  */
743 static int
i915_gem_gtt_pwrite_fast(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file)744 i915_gem_gtt_pwrite_fast(struct drm_device *dev,
745 			 struct drm_i915_gem_object *obj,
746 			 struct drm_i915_gem_pwrite *args,
747 			 struct drm_file *file)
748 {
749 	ssize_t remain;
750 	off_t offset, page_base;
751 	char __user *user_data;
752 	int page_offset, page_length, ret;
753 
754 	ret = i915_gem_object_pin(obj, 0, true);
755 	/* XXXKIB ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); */
756 	if (ret)
757 		goto out;
758 
759 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
760 	if (ret)
761 		goto out_unpin;
762 
763 	ret = i915_gem_object_put_fence(obj);
764 	if (ret)
765 		goto out_unpin;
766 
767 	user_data = to_user_ptr(args->data_ptr);
768 	remain = args->size;
769 
770 	offset = obj->gtt_offset + args->offset;
771 
772 	while (remain > 0) {
773 		/* Operation in this page
774 		 *
775 		 * page_base = page offset within aperture
776 		 * page_offset = offset within page
777 		 * page_length = bytes to copy for this page
778 		 */
779 		page_base = offset & ~PAGE_MASK;
780 		page_offset = offset_in_page(offset);
781 		page_length = remain;
782 		if ((page_offset + remain) > PAGE_SIZE)
783 			page_length = PAGE_SIZE - page_offset;
784 
785 		/* If we get a fault while copying data, then (presumably) our
786 		 * source page isn't available.  Return the error and we'll
787 		 * retry in the slow path.
788 		 */
789 		if (fast_user_write(dev, page_base,
790 				    page_offset, user_data, page_length)) {
791 			ret = -EFAULT;
792 			goto out_unpin;
793 		}
794 
795 		remain -= page_length;
796 		user_data += page_length;
797 		offset += page_length;
798 	}
799 
800 out_unpin:
801 	i915_gem_object_unpin(obj);
802 out:
803 	return ret;
804 }
805 
806 /* Per-page copy function for the shmem pwrite fastpath.
807  * Flushes invalid cachelines before writing to the target if
808  * needs_clflush_before is set and flushes out any written cachelines after
809  * writing if needs_clflush is set. */
810 static int
shmem_pwrite_fast(vm_page_t page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)811 shmem_pwrite_fast(vm_page_t page, int shmem_page_offset, int page_length,
812 		  char __user *user_data,
813 		  bool page_do_bit17_swizzling,
814 		  bool needs_clflush_before,
815 		  bool needs_clflush_after)
816 {
817 	char *vaddr;
818 	struct sf_buf *sf;
819 	int ret;
820 
821 	if (unlikely(page_do_bit17_swizzling))
822 		return -EINVAL;
823 
824 	sched_pin();
825 	sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE);
826 	if (sf == NULL) {
827 		sched_unpin();
828 		return (-EFAULT);
829 	}
830 	vaddr = (char *)sf_buf_kva(sf);
831 	if (needs_clflush_before)
832 		drm_clflush_virt_range(vaddr + shmem_page_offset,
833 				       page_length);
834 	ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
835 						user_data,
836 						page_length);
837 	if (needs_clflush_after)
838 		drm_clflush_virt_range(vaddr + shmem_page_offset,
839 				       page_length);
840 	sf_buf_free(sf);
841 	sched_unpin();
842 
843 	return ret ? -EFAULT : 0;
844 }
845 
846 /* Only difference to the fast-path function is that this can handle bit17
847  * and uses non-atomic copy and kmap functions. */
848 static int
shmem_pwrite_slow(vm_page_t page,int shmem_page_offset,int page_length,char __user * user_data,bool page_do_bit17_swizzling,bool needs_clflush_before,bool needs_clflush_after)849 shmem_pwrite_slow(vm_page_t page, int shmem_page_offset, int page_length,
850 		  char __user *user_data,
851 		  bool page_do_bit17_swizzling,
852 		  bool needs_clflush_before,
853 		  bool needs_clflush_after)
854 {
855 	char *vaddr;
856 	struct sf_buf *sf;
857 	int ret;
858 
859 	sf = sf_buf_alloc(page, 0);
860 	vaddr = (char *)sf_buf_kva(sf);
861 	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
862 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
863 					     page_length,
864 					     page_do_bit17_swizzling);
865 	if (page_do_bit17_swizzling)
866 		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
867 						user_data,
868 						page_length);
869 	else
870 		ret = __copy_from_user(vaddr + shmem_page_offset,
871 				       user_data,
872 				       page_length);
873 	if (needs_clflush_after)
874 		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
875 					     page_length,
876 					     page_do_bit17_swizzling);
877 	sf_buf_free(sf);
878 
879 	return ret ? -EFAULT : 0;
880 }
881 
882 static int
i915_gem_shmem_pwrite(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file)883 i915_gem_shmem_pwrite(struct drm_device *dev,
884 		      struct drm_i915_gem_object *obj,
885 		      struct drm_i915_gem_pwrite *args,
886 		      struct drm_file *file)
887 {
888 	ssize_t remain, sremain;
889 	off_t offset, soffset;
890 	char __user *user_data;
891 	int shmem_page_offset, page_length, ret = 0;
892 	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
893 	int hit_slowpath = 0;
894 	int needs_clflush_after = 0;
895 	int needs_clflush_before = 0;
896 
897 	user_data = to_user_ptr(args->data_ptr);
898 	sremain = remain = args->size;
899 
900 	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
901 
902 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
903 		/* If we're not in the cpu write domain, set ourself into the gtt
904 		 * write domain and manually flush cachelines (if required). This
905 		 * optimizes for the case when the gpu will use the data
906 		 * right away and we therefore have to clflush anyway. */
907 		needs_clflush_after = cpu_write_needs_clflush(obj);
908 		ret = i915_gem_object_set_to_gtt_domain(obj, true);
909 		if (ret)
910 			return ret;
911 	}
912 	/* Same trick applies to invalidate partially written cachelines read
913 	 * before writing. */
914 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
915 		needs_clflush_before =
916 			!cpu_cache_is_coherent(dev, obj->cache_level);
917 
918 	soffset = offset = args->offset;
919 	ret = i915_gem_object_get_pages_range(obj, soffset, soffset + sremain);
920 	if (ret)
921 		return ret;
922 
923 	i915_gem_object_pin_pages(obj);
924 
925 	obj->dirty = 1;
926 
927 	VM_OBJECT_WLOCK(obj->base.vm_obj);
928 	for (vm_page_t page = vm_page_find_least(obj->base.vm_obj,
929 	    OFF_TO_IDX(offset));; page = vm_page_next(page)) {
930 		VM_OBJECT_WUNLOCK(obj->base.vm_obj);
931 		int partial_cacheline_write;
932 
933 		if (remain <= 0)
934 			break;
935 
936 		/* Operation in this page
937 		 *
938 		 * shmem_page_offset = offset within page in shmem file
939 		 * page_length = bytes to copy for this page
940 		 */
941 		shmem_page_offset = offset_in_page(offset);
942 
943 		page_length = remain;
944 		if ((shmem_page_offset + page_length) > PAGE_SIZE)
945 			page_length = PAGE_SIZE - shmem_page_offset;
946 
947 		/* If we don't overwrite a cacheline completely we need to be
948 		 * careful to have up-to-date data by first clflushing. Don't
949 		 * overcomplicate things and flush the entire patch. */
950 		partial_cacheline_write = needs_clflush_before &&
951 			((shmem_page_offset | page_length)
952 				& (cpu_clflush_line_size - 1));
953 
954 		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
955 			(page_to_phys(page) & (1 << 17)) != 0;
956 
957 		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
958 					user_data, page_do_bit17_swizzling,
959 					partial_cacheline_write,
960 					needs_clflush_after);
961 		if (ret == 0)
962 			goto next_page;
963 
964 		hit_slowpath = 1;
965 		DRM_UNLOCK(dev);
966 		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
967 					user_data, page_do_bit17_swizzling,
968 					partial_cacheline_write,
969 					needs_clflush_after);
970 
971 		DRM_LOCK(dev);
972 
973 next_page:
974 		vm_page_dirty(page);
975 		vm_page_reference(page);
976 
977 		if (ret)
978 			goto out;
979 
980 		remain -= page_length;
981 		user_data += page_length;
982 		offset += page_length;
983 		VM_OBJECT_WLOCK(obj->base.vm_obj);
984 	}
985 
986 out:
987 	i915_gem_object_unpin_pages(obj);
988 	i915_gem_object_put_pages_range(obj, soffset, soffset + sremain);
989 
990 	if (hit_slowpath) {
991 		/*
992 		 * Fixup: Flush cpu caches in case we didn't flush the dirty
993 		 * cachelines in-line while writing and the object moved
994 		 * out of the cpu write domain while we've dropped the lock.
995 		 */
996 		if (!needs_clflush_after &&
997 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
998 			i915_gem_clflush_object(obj);
999 			i915_gem_chipset_flush(dev);
1000 		}
1001 	}
1002 
1003 	if (needs_clflush_after)
1004 		i915_gem_chipset_flush(dev);
1005 
1006 	return ret;
1007 }
1008 
1009 /**
1010  * Writes data to the object referenced by handle.
1011  *
1012  * On error, the contents of the buffer that were to be modified are undefined.
1013  */
1014 int
i915_gem_pwrite_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1015 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1016 		      struct drm_file *file)
1017 {
1018 	struct drm_i915_gem_pwrite *args = data;
1019 	struct drm_i915_gem_object *obj;
1020 	int ret;
1021 
1022 	if (args->size == 0)
1023 		return 0;
1024 
1025 	if (!useracc(to_user_ptr(args->data_ptr), args->size, VM_PROT_READ))
1026 		return -EFAULT;
1027 
1028 	if (likely(!i915_prefault_disable)) {
1029 		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
1030 						   args->size);
1031 		if (ret)
1032 			return -EFAULT;
1033 	}
1034 
1035 	ret = i915_mutex_lock_interruptible(dev);
1036 	if (ret)
1037 		return ret;
1038 
1039 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1040 	if (&obj->base == NULL) {
1041 		ret = -ENOENT;
1042 		goto unlock;
1043 	}
1044 
1045 	/* Bounds check destination. */
1046 	if (args->offset > obj->base.size ||
1047 	    args->size > obj->base.size - args->offset) {
1048 		ret = -EINVAL;
1049 		goto out;
1050 	}
1051 
1052 #if 1
1053 	KIB_NOTYET();
1054 #else
1055 	/* prime objects have no backing filp to GEM pread/pwrite
1056 	 * pages from.
1057 	 */
1058 	if (!obj->base.filp) {
1059 		ret = -EINVAL;
1060 		goto out;
1061 	}
1062 #endif
1063 
1064 	CTR3(KTR_DRM, "pwrite %p %jx %jx", obj, args->offset, args->size);
1065 
1066 	ret = -EFAULT;
1067 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
1068 	 * it would end up going through the fenced access, and we'll get
1069 	 * different detiling behavior between reading and writing.
1070 	 * pread/pwrite currently are reading and writing from the CPU
1071 	 * perspective, requiring manual detiling by the client.
1072 	 */
1073 	if (obj->phys_obj) {
1074 		ret = i915_gem_phys_pwrite(dev, obj, args, file);
1075 		goto out;
1076 	}
1077 
1078 	if (obj->tiling_mode == I915_TILING_NONE &&
1079 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1080 	    cpu_write_needs_clflush(obj)) {
1081 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1082 		/* Note that the gtt paths might fail with non-page-backed user
1083 		 * pointers (e.g. gtt mappings when moving data between
1084 		 * textures). Fallback to the shmem path in that case. */
1085 	}
1086 
1087 	if (ret == -EFAULT || ret == -ENOSPC)
1088 		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1089 
1090 out:
1091 	drm_gem_object_unreference(&obj->base);
1092 unlock:
1093 	DRM_UNLOCK(dev);
1094 	return ret;
1095 }
1096 
1097 static int
i915_gem_check_wedge(struct drm_i915_private * dev_priv)1098 i915_gem_check_wedge(struct drm_i915_private *dev_priv)
1099 {
1100 	DRM_LOCK_ASSERT(dev_priv->dev);
1101 
1102 	if (atomic_load_acq_int(&dev_priv->mm.wedged) != 0) {
1103 		bool recovery_complete;
1104 
1105 		/* Give the error handler a chance to run. */
1106 		mtx_lock(&dev_priv->error_completion_lock);
1107 		recovery_complete = (&dev_priv->error_completion) > 0;
1108 		mtx_unlock(&dev_priv->error_completion_lock);
1109 
1110 		return (recovery_complete ? -EIO : -EAGAIN);
1111 	}
1112 
1113 	return 0;
1114 }
1115 
1116 /*
1117  * Compare seqno against outstanding lazy request. Emit a request if they are
1118  * equal.
1119  */
1120 static int
i915_gem_check_olr(struct intel_ring_buffer * ring,u32 seqno)1121 i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1122 {
1123 	int ret;
1124 
1125 	DRM_LOCK_ASSERT(ring->dev);
1126 
1127 	ret = 0;
1128 	if (seqno == ring->outstanding_lazy_request) {
1129 		struct drm_i915_gem_request *request;
1130 
1131 		request = malloc(sizeof(*request), DRM_I915_GEM,
1132 		    M_WAITOK | M_ZERO);
1133 
1134 		ret = i915_add_request(ring, NULL, request);
1135 		if (ret != 0) {
1136 			free(request, DRM_I915_GEM);
1137 			return ret;
1138 		}
1139 
1140 		MPASS(seqno == request->seqno);
1141 	}
1142 	return ret;
1143 }
1144 
__wait_seqno(struct intel_ring_buffer * ring,u32 seqno,bool interruptible)1145 static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1146 			bool interruptible)
1147 {
1148 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1149 	int ret = 0, flags;
1150 
1151 	if (i915_seqno_passed(ring->get_seqno(ring), seqno))
1152 		return 0;
1153 
1154 	CTR2(KTR_DRM, "request_wait_begin %s %d", ring->name, seqno);
1155 
1156 	mtx_lock(&dev_priv->irq_lock);
1157 	if (!ring->irq_get(ring)) {
1158 		mtx_unlock(&dev_priv->irq_lock);
1159 		return -ENODEV;
1160 	}
1161 
1162 	flags = interruptible ? PCATCH : 0;
1163 	while (!i915_seqno_passed(ring->get_seqno(ring), seqno)
1164 	    && !atomic_load_acq_int(&dev_priv->mm.wedged) &&
1165 	    ret == 0) {
1166 		ret = -msleep(ring, &dev_priv->irq_lock, flags, "915gwr", 0);
1167 		if (ret == -ERESTART)
1168 			ret = -ERESTARTSYS;
1169 	}
1170 	ring->irq_put(ring);
1171 	mtx_unlock(&dev_priv->irq_lock);
1172 
1173 	CTR3(KTR_DRM, "request_wait_end %s %d %d", ring->name, seqno, ret);
1174 
1175 	return ret;
1176 }
1177 
1178 /**
1179  * Waits for a sequence number to be signaled, and cleans up the
1180  * request and object lists appropriately for that event.
1181  */
1182 int
i915_wait_request(struct intel_ring_buffer * ring,uint32_t seqno)1183 i915_wait_request(struct intel_ring_buffer *ring, uint32_t seqno)
1184 {
1185 	struct drm_device *dev = ring->dev;
1186 	struct drm_i915_private *dev_priv = dev->dev_private;
1187 	int ret;
1188 
1189 	KASSERT(seqno != 0, ("Zero seqno"));
1190 
1191 	ret = i915_gem_check_wedge(dev_priv);
1192 	if (ret)
1193 		return ret;
1194 
1195 	ret = i915_gem_check_olr(ring, seqno);
1196 	if (ret)
1197 		return ret;
1198 
1199 	ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible);
1200 	if (atomic_load_acq_int(&dev_priv->mm.wedged))
1201 		ret = -EAGAIN;
1202 
1203 	return ret;
1204 }
1205 
1206 /**
1207  * Ensures that all rendering to the object has completed and the object is
1208  * safe to unbind from the GTT or access from the CPU.
1209  */
1210 static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object * obj)1211 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
1212 {
1213 	int ret;
1214 
1215 	KASSERT((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0,
1216 	    ("In GPU write domain"));
1217 
1218 	CTR5(KTR_DRM, "object_wait_rendering %p %s %x %d %d", obj,
1219 	    obj->ring != NULL ? obj->ring->name : "none", obj->gtt_offset,
1220 	    obj->active, obj->last_rendering_seqno);
1221 	if (obj->active) {
1222 		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
1223 		if (ret != 0)
1224 			return (ret);
1225 		i915_gem_retire_requests_ring(obj->ring);
1226 	}
1227 
1228 	return 0;
1229 }
1230 
1231 int
i915_gem_set_domain_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1232 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1233 			  struct drm_file *file)
1234 {
1235 	struct drm_i915_gem_set_domain *args = data;
1236 	struct drm_i915_gem_object *obj;
1237 	uint32_t read_domains = args->read_domains;
1238 	uint32_t write_domain = args->write_domain;
1239 	int ret;
1240 
1241 	/* Only handle setting domains to types used by the CPU. */
1242 	if (write_domain & I915_GEM_GPU_DOMAINS)
1243 		return -EINVAL;
1244 
1245 	if (read_domains & I915_GEM_GPU_DOMAINS)
1246 		return -EINVAL;
1247 
1248 	/* Having something in the write domain implies it's in the read
1249 	 * domain, and only that read domain.  Enforce that in the request.
1250 	 */
1251 	if (write_domain != 0 && read_domains != write_domain)
1252 		return -EINVAL;
1253 
1254 	ret = i915_mutex_lock_interruptible(dev);
1255 	if (ret)
1256 		return ret;
1257 
1258 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1259 	if (&obj->base == NULL) {
1260 		ret = -ENOENT;
1261 		goto unlock;
1262 	}
1263 
1264 	if (read_domains & I915_GEM_DOMAIN_GTT) {
1265 		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1266 
1267 		/* Silently promote "you're not bound, there was nothing to do"
1268 		 * to success, since the client was just asking us to
1269 		 * make sure everything was done.
1270 		 */
1271 		if (ret == -EINVAL)
1272 			ret = 0;
1273 	} else {
1274 		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1275 	}
1276 
1277 	drm_gem_object_unreference(&obj->base);
1278 unlock:
1279 	DRM_UNLOCK(dev);
1280 	return ret;
1281 }
1282 
1283 /**
1284  * Called when user space has done writes to this buffer
1285  */
1286 int
i915_gem_sw_finish_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1287 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1288 			 struct drm_file *file)
1289 {
1290 	struct drm_i915_gem_sw_finish *args = data;
1291 	struct drm_i915_gem_object *obj;
1292 	int ret = 0;
1293 
1294 	ret = i915_mutex_lock_interruptible(dev);
1295 	if (ret)
1296 		return ret;
1297 
1298 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1299 	if (&obj->base == NULL) {
1300 		ret = -ENOENT;
1301 		goto unlock;
1302 	}
1303 
1304 	/* Pinned buffers may be scanout, so flush the cache */
1305 	if (obj->pin_count)
1306 		i915_gem_object_flush_cpu_write_domain(obj);
1307 
1308 	drm_gem_object_unreference(&obj->base);
1309 unlock:
1310 	DRM_UNLOCK(dev);
1311 	return ret;
1312 }
1313 
1314 /**
1315  * Maps the contents of an object, returning the address it is mapped
1316  * into.
1317  *
1318  * While the mapping holds a reference on the contents of the object, it doesn't
1319  * imply a ref on the object itself.
1320  */
1321 int
i915_gem_mmap_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1322 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1323 		    struct drm_file *file)
1324 {
1325 	struct drm_i915_gem_mmap *args = data;
1326 	struct drm_gem_object *obj;
1327 	struct proc *p;
1328 	vm_map_t map;
1329 	vm_offset_t addr;
1330 	vm_size_t size;
1331 	int error, rv;
1332 
1333 	obj = drm_gem_object_lookup(dev, file, args->handle);
1334 	if (obj == NULL)
1335 		return -ENOENT;
1336 
1337 	error = 0;
1338 	if (args->size == 0)
1339 		goto out;
1340 	p = curproc;
1341 	map = &p->p_vmspace->vm_map;
1342 	size = round_page(args->size);
1343 	PROC_LOCK(p);
1344 	if (map->size + size > lim_cur_proc(p, RLIMIT_VMEM)) {
1345 		PROC_UNLOCK(p);
1346 		error = -ENOMEM;
1347 		goto out;
1348 	}
1349 	PROC_UNLOCK(p);
1350 
1351 	addr = 0;
1352 	vm_object_reference(obj->vm_obj);
1353 	rv = vm_map_find(map, obj->vm_obj, args->offset, &addr, args->size, 0,
1354 	    VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
1355 	    VM_PROT_READ | VM_PROT_WRITE, MAP_INHERIT_SHARE);
1356 	if (rv != KERN_SUCCESS) {
1357 		vm_object_deallocate(obj->vm_obj);
1358 		error = -vm_mmap_to_errno(rv);
1359 	} else {
1360 		args->addr_ptr = (uint64_t)addr;
1361 	}
1362 out:
1363 	drm_gem_object_unreference(obj);
1364 	return (error);
1365 }
1366 
1367 static int
i915_gem_pager_ctor(void * handle,vm_ooffset_t size,vm_prot_t prot,vm_ooffset_t foff,struct ucred * cred,u_short * color)1368 i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
1369     vm_ooffset_t foff, struct ucred *cred, u_short *color)
1370 {
1371 
1372 	*color = 0; /* XXXKIB */
1373 	return (0);
1374 }
1375 
1376 /**
1377  * i915_gem_fault - fault a page into the GTT
1378  * vma: VMA in question
1379  * vmf: fault info
1380  *
1381  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1382  * from userspace.  The fault handler takes care of binding the object to
1383  * the GTT (if needed), allocating and programming a fence register (again,
1384  * only if needed based on whether the old reg is still valid or the object
1385  * is tiled) and inserting a new PTE into the faulting process.
1386  *
1387  * Note that the faulting process may involve evicting existing objects
1388  * from the GTT and/or fence registers to make room.  So performance may
1389  * suffer if the GTT working set is large or there are few fence registers
1390  * left.
1391  */
1392 
1393 int i915_intr_pf;
1394 
1395 static int
i915_gem_pager_fault(vm_object_t vm_obj,vm_ooffset_t offset,int prot,vm_page_t * mres)1396 i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot,
1397     vm_page_t *mres)
1398 {
1399 	struct drm_gem_object *gem_obj;
1400 	struct drm_i915_gem_object *obj;
1401 	struct drm_device *dev;
1402 	drm_i915_private_t *dev_priv;
1403 	vm_page_t page, oldpage;
1404 	int cause, ret;
1405 	bool write;
1406 
1407 	gem_obj = vm_obj->handle;
1408 	obj = to_intel_bo(gem_obj);
1409 	dev = obj->base.dev;
1410 	dev_priv = dev->dev_private;
1411 #if 0
1412 	write = (prot & VM_PROT_WRITE) != 0;
1413 #else
1414 	write = true;
1415 #endif
1416 	vm_object_pip_add(vm_obj, 1);
1417 
1418 	/*
1419 	 * Remove the placeholder page inserted by vm_fault() from the
1420 	 * object before dropping the object lock. If
1421 	 * i915_gem_release_mmap() is active in parallel on this gem
1422 	 * object, then it owns the drm device sx and might find the
1423 	 * placeholder already. Then, since the page is busy,
1424 	 * i915_gem_release_mmap() sleeps waiting for the busy state
1425 	 * of the page cleared. We will be not able to acquire drm
1426 	 * device lock until i915_gem_release_mmap() is able to make a
1427 	 * progress.
1428 	 */
1429 	if (*mres != NULL) {
1430 		oldpage = *mres;
1431 		vm_page_lock(oldpage);
1432 		vm_page_remove(oldpage);
1433 		vm_page_unlock(oldpage);
1434 		*mres = NULL;
1435 	} else
1436 		oldpage = NULL;
1437 	VM_OBJECT_WUNLOCK(vm_obj);
1438 retry:
1439 	cause = ret = 0;
1440 	page = NULL;
1441 
1442 	if (i915_intr_pf) {
1443 		ret = i915_mutex_lock_interruptible(dev);
1444 		if (ret != 0) {
1445 			cause = 10;
1446 			goto out;
1447 		}
1448 	} else
1449 		DRM_LOCK(dev);
1450 
1451 	/*
1452 	 * Since the object lock was dropped, other thread might have
1453 	 * faulted on the same GTT address and instantiated the
1454 	 * mapping for the page.  Recheck.
1455 	 */
1456 	VM_OBJECT_WLOCK(vm_obj);
1457 	page = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
1458 	if (page != NULL) {
1459 		if (vm_page_busied(page)) {
1460 			DRM_UNLOCK(dev);
1461 			vm_page_lock(page);
1462 			VM_OBJECT_WUNLOCK(vm_obj);
1463 			vm_page_busy_sleep(page, "915pee");
1464 			goto retry;
1465 		}
1466 		goto have_page;
1467 	} else
1468 		VM_OBJECT_WUNLOCK(vm_obj);
1469 
1470 	/* Now bind it into the GTT if needed */
1471 	if (!obj->map_and_fenceable) {
1472 		ret = i915_gem_object_unbind(obj);
1473 		if (ret != 0) {
1474 			cause = 20;
1475 			goto unlock;
1476 		}
1477 	}
1478 	if (!obj->gtt_space) {
1479 		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1480 		if (ret != 0) {
1481 			cause = 30;
1482 			goto unlock;
1483 		}
1484 
1485 		ret = i915_gem_object_set_to_gtt_domain(obj, write);
1486 		if (ret != 0) {
1487 			cause = 40;
1488 			goto unlock;
1489 		}
1490 	}
1491 
1492 	if (!obj->has_global_gtt_mapping)
1493 		i915_gem_gtt_bind_object(obj, obj->cache_level);
1494 
1495 	ret = i915_gem_object_get_fence(obj);
1496 	if (ret != 0) {
1497 		cause = 50;
1498 		goto unlock;
1499 	}
1500 
1501 	if (i915_gem_object_is_inactive(obj))
1502 		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1503 
1504 	obj->fault_mappable = true;
1505 	VM_OBJECT_WLOCK(vm_obj);
1506 	page = PHYS_TO_VM_PAGE(dev->agp->base + obj->gtt_offset + offset);
1507 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
1508 	    ("physical address %#jx not fictitious",
1509 	    (uintmax_t)(dev->agp->base + obj->gtt_offset + offset)));
1510 	if (page == NULL) {
1511 		VM_OBJECT_WUNLOCK(vm_obj);
1512 		cause = 60;
1513 		ret = -EFAULT;
1514 		goto unlock;
1515 	}
1516 	KASSERT((page->flags & PG_FICTITIOUS) != 0,
1517 	    ("not fictitious %p", page));
1518 	KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page));
1519 
1520 	if (vm_page_busied(page)) {
1521 		DRM_UNLOCK(dev);
1522 		vm_page_lock(page);
1523 		VM_OBJECT_WUNLOCK(vm_obj);
1524 		vm_page_busy_sleep(page, "915pbs");
1525 		goto retry;
1526 	}
1527 	if (vm_page_insert(page, vm_obj, OFF_TO_IDX(offset))) {
1528 		DRM_UNLOCK(dev);
1529 		VM_OBJECT_WUNLOCK(vm_obj);
1530 		VM_WAIT;
1531 		goto retry;
1532 	}
1533 	page->valid = VM_PAGE_BITS_ALL;
1534 have_page:
1535 	*mres = page;
1536 	vm_page_xbusy(page);
1537 
1538 	CTR4(KTR_DRM, "fault %p %jx %x phys %x", gem_obj, offset, prot,
1539 	    page->phys_addr);
1540 	DRM_UNLOCK(dev);
1541 	if (oldpage != NULL) {
1542 		vm_page_lock(oldpage);
1543 		vm_page_free(oldpage);
1544 		vm_page_unlock(oldpage);
1545 	}
1546 	vm_object_pip_wakeup(vm_obj);
1547 	return (VM_PAGER_OK);
1548 
1549 unlock:
1550 	DRM_UNLOCK(dev);
1551 out:
1552 	KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return"));
1553 	CTR5(KTR_DRM, "fault_fail %p %jx %x err %d %d", gem_obj, offset, prot,
1554 	    -ret, cause);
1555 	if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) {
1556 		kern_yield(PRI_USER);
1557 		goto retry;
1558 	}
1559 	VM_OBJECT_WLOCK(vm_obj);
1560 	vm_object_pip_wakeup(vm_obj);
1561 	return (VM_PAGER_ERROR);
1562 }
1563 
1564 static void
i915_gem_pager_dtor(void * handle)1565 i915_gem_pager_dtor(void *handle)
1566 {
1567 	struct drm_gem_object *obj;
1568 	struct drm_device *dev;
1569 
1570 	obj = handle;
1571 	dev = obj->dev;
1572 
1573 	DRM_LOCK(dev);
1574 	drm_gem_free_mmap_offset(obj);
1575 	i915_gem_release_mmap(to_intel_bo(obj));
1576 	drm_gem_object_unreference(obj);
1577 	DRM_UNLOCK(dev);
1578 }
1579 
1580 struct cdev_pager_ops i915_gem_pager_ops = {
1581 	.cdev_pg_fault	= i915_gem_pager_fault,
1582 	.cdev_pg_ctor	= i915_gem_pager_ctor,
1583 	.cdev_pg_dtor	= i915_gem_pager_dtor
1584 };
1585 
1586 /**
1587  * i915_gem_release_mmap - remove physical page mappings
1588  * @obj: obj in question
1589  *
1590  * Preserve the reservation of the mmapping with the DRM core code, but
1591  * relinquish ownership of the pages back to the system.
1592  *
1593  * It is vital that we remove the page mapping if we have mapped a tiled
1594  * object through the GTT and then lose the fence register due to
1595  * resource pressure. Similarly if the object has been moved out of the
1596  * aperture, than pages mapped into userspace must be revoked. Removing the
1597  * mapping will then trigger a page fault on the next user access, allowing
1598  * fixup by i915_gem_fault().
1599  */
1600 void
i915_gem_release_mmap(struct drm_i915_gem_object * obj)1601 i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1602 {
1603 	vm_object_t devobj;
1604 	vm_page_t page;
1605 	int i, page_count;
1606 
1607 	if (!obj->fault_mappable)
1608 		return;
1609 
1610 	CTR3(KTR_DRM, "release_mmap %p %x %x", obj, obj->gtt_offset,
1611 	    OFF_TO_IDX(obj->base.size));
1612 	devobj = cdev_pager_lookup(obj);
1613 	if (devobj != NULL) {
1614 		page_count = OFF_TO_IDX(obj->base.size);
1615 
1616 		VM_OBJECT_WLOCK(devobj);
1617 retry:
1618 		for (i = 0; i < page_count; i++) {
1619 			page = vm_page_lookup(devobj, i);
1620 			if (page == NULL)
1621 				continue;
1622 			if (vm_page_sleep_if_busy(page, "915unm"))
1623 				goto retry;
1624 			cdev_pager_free_page(devobj, page);
1625 		}
1626 		VM_OBJECT_WUNLOCK(devobj);
1627 		vm_object_deallocate(devobj);
1628 	}
1629 
1630 	obj->fault_mappable = false;
1631 }
1632 
1633 static uint32_t
i915_gem_get_gtt_size(struct drm_device * dev,uint32_t size,int tiling_mode)1634 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1635 {
1636 	uint32_t gtt_size;
1637 
1638 	if (INTEL_INFO(dev)->gen >= 4 ||
1639 	    tiling_mode == I915_TILING_NONE)
1640 		return size;
1641 
1642 	/* Previous chips need a power-of-two fence region when tiling */
1643 	if (INTEL_INFO(dev)->gen == 3)
1644 		gtt_size = 1024*1024;
1645 	else
1646 		gtt_size = 512*1024;
1647 
1648 	while (gtt_size < size)
1649 		gtt_size <<= 1;
1650 
1651 	return gtt_size;
1652 }
1653 
1654 /**
1655  * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1656  * @obj: object to check
1657  *
1658  * Return the required GTT alignment for an object, taking into account
1659  * potential fence register mapping.
1660  */
1661 static uint32_t
i915_gem_get_gtt_alignment(struct drm_device * dev,uint32_t size,int tiling_mode)1662 i915_gem_get_gtt_alignment(struct drm_device *dev,
1663 			   uint32_t size,
1664 			   int tiling_mode)
1665 {
1666 	/*
1667 	 * Minimum alignment is 4k (GTT page size), but might be greater
1668 	 * if a fence register is needed for the object.
1669 	 */
1670 	if (INTEL_INFO(dev)->gen >= 4 ||
1671 	    tiling_mode == I915_TILING_NONE)
1672 		return 4096;
1673 
1674 	/*
1675 	 * Previous chips need to be aligned to the size of the smallest
1676 	 * fence register that can contain the object.
1677 	 */
1678 	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1679 }
1680 
1681 /**
1682  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1683  *					 unfenced object
1684  * @dev: the device
1685  * @size: size of the object
1686  * @tiling_mode: tiling mode of the object
1687  *
1688  * Return the required GTT alignment for an object, only taking into account
1689  * unfenced tiled surface requirements.
1690  */
1691 uint32_t
i915_gem_get_unfenced_gtt_alignment(struct drm_device * dev,uint32_t size,int tiling_mode)1692 i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1693 				    uint32_t size,
1694 				    int tiling_mode)
1695 {
1696 	/*
1697 	 * Minimum alignment is 4k (GTT page size) for sane hw.
1698 	 */
1699 	if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1700 	    tiling_mode == I915_TILING_NONE)
1701 		return 4096;
1702 
1703 	/* Previous hardware however needs to be aligned to a power-of-two
1704 	 * tile height. The simplest method for determining this is to reuse
1705 	 * the power-of-tile object size.
1706 	 */
1707 	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1708 }
1709 
1710 int
i915_gem_mmap_gtt(struct drm_file * file,struct drm_device * dev,uint32_t handle,uint64_t * offset)1711 i915_gem_mmap_gtt(struct drm_file *file,
1712 		  struct drm_device *dev,
1713 		  uint32_t handle,
1714 		  uint64_t *offset)
1715 {
1716 	struct drm_i915_private *dev_priv = dev->dev_private;
1717 	struct drm_i915_gem_object *obj;
1718 	int ret;
1719 
1720 	ret = i915_mutex_lock_interruptible(dev);
1721 	if (ret)
1722 		return ret;
1723 
1724 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1725 	if (&obj->base == NULL) {
1726 		ret = -ENOENT;
1727 		goto unlock;
1728 	}
1729 
1730 	if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1731 		ret = -E2BIG;
1732 		goto out;
1733 	}
1734 
1735 	if (obj->madv != I915_MADV_WILLNEED) {
1736 		DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1737 		ret = -EINVAL;
1738 		goto out;
1739 	}
1740 
1741 	ret = drm_gem_create_mmap_offset(&obj->base);
1742 	if (ret)
1743 		goto out;
1744 
1745 	*offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) |
1746 	    DRM_GEM_MAPPING_KEY;
1747 
1748 out:
1749 	drm_gem_object_unreference(&obj->base);
1750 unlock:
1751 	DRM_UNLOCK(dev);
1752 	return ret;
1753 }
1754 
1755 /**
1756  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1757  * @dev: DRM device
1758  * @data: GTT mapping ioctl data
1759  * @file: GEM object info
1760  *
1761  * Simply returns the fake offset to userspace so it can mmap it.
1762  * The mmap call will end up in drm_gem_mmap(), which will set things
1763  * up so we can get faults in the handler above.
1764  *
1765  * The fault handler will take care of binding the object into the GTT
1766  * (since it may have been evicted to make room for something), allocating
1767  * a fence register, and mapping the appropriate aperture address into
1768  * userspace.
1769  */
1770 int
i915_gem_mmap_gtt_ioctl(struct drm_device * dev,void * data,struct drm_file * file)1771 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1772 			struct drm_file *file)
1773 {
1774 	struct drm_i915_gem_mmap_gtt *args = data;
1775 
1776 	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1777 }
1778 
1779 /* Immediately discard the backing storage */
1780 static void
i915_gem_object_truncate(struct drm_i915_gem_object * obj)1781 i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1782 {
1783 	vm_object_t vm_obj;
1784 
1785 	vm_obj = obj->base.vm_obj;
1786 	VM_OBJECT_WLOCK(vm_obj);
1787 	vm_object_page_remove(vm_obj, 0, 0, false);
1788 	VM_OBJECT_WUNLOCK(vm_obj);
1789 	drm_gem_free_mmap_offset(&obj->base);
1790 	obj->madv = I915_MADV_PURGED_INTERNAL;
1791 }
1792 
1793 static inline int
i915_gem_object_is_purgeable(struct drm_i915_gem_object * obj)1794 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1795 {
1796 	return obj->madv == I915_MADV_DONTNEED;
1797 }
1798 
1799 static void
i915_gem_object_put_pages_range_locked(struct drm_i915_gem_object * obj,vm_pindex_t si,vm_pindex_t ei)1800 i915_gem_object_put_pages_range_locked(struct drm_i915_gem_object *obj,
1801     vm_pindex_t si, vm_pindex_t ei)
1802 {
1803 	vm_object_t vm_obj;
1804 	vm_page_t page;
1805 	vm_pindex_t i;
1806 
1807 	vm_obj = obj->base.vm_obj;
1808 	VM_OBJECT_ASSERT_LOCKED(vm_obj);
1809 	for (i = si,  page = vm_page_lookup(vm_obj, i); i < ei;
1810 	    page = vm_page_next(page), i++) {
1811 		KASSERT(page->pindex == i, ("pindex %jx %jx",
1812 		    (uintmax_t)page->pindex, (uintmax_t)i));
1813 		vm_page_lock(page);
1814 		vm_page_unwire(page, PQ_INACTIVE);
1815 		if (page->wire_count == 0)
1816 			atomic_add_long(&i915_gem_wired_pages_cnt, -1);
1817 		vm_page_unlock(page);
1818 	}
1819 }
1820 
1821 #define	GEM_PARANOID_CHECK_GTT 0
1822 #if GEM_PARANOID_CHECK_GTT
1823 static void
i915_gem_assert_pages_not_mapped(struct drm_device * dev,vm_page_t * ma,int page_count)1824 i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma,
1825     int page_count)
1826 {
1827 	struct drm_i915_private *dev_priv;
1828 	vm_paddr_t pa;
1829 	unsigned long start, end;
1830 	u_int i;
1831 	int j;
1832 
1833 	dev_priv = dev->dev_private;
1834 	start = OFF_TO_IDX(dev_priv->mm.gtt_start);
1835 	end = OFF_TO_IDX(dev_priv->mm.gtt_end);
1836 	for (i = start; i < end; i++) {
1837 		pa = intel_gtt_read_pte_paddr(i);
1838 		for (j = 0; j < page_count; j++) {
1839 			if (pa == VM_PAGE_TO_PHYS(ma[j])) {
1840 				panic("Page %p in GTT pte index %d pte %x",
1841 				    ma[i], i, intel_gtt_read_pte(i));
1842 			}
1843 		}
1844 	}
1845 }
1846 #endif
1847 
1848 static void
i915_gem_object_put_pages_range(struct drm_i915_gem_object * obj,off_t start,off_t end)1849 i915_gem_object_put_pages_range(struct drm_i915_gem_object *obj,
1850     off_t start, off_t end)
1851 {
1852 	vm_object_t vm_obj;
1853 
1854 	vm_obj = obj->base.vm_obj;
1855 	VM_OBJECT_WLOCK(vm_obj);
1856 	i915_gem_object_put_pages_range_locked(obj,
1857 	    OFF_TO_IDX(trunc_page(start)), OFF_TO_IDX(round_page(end)));
1858 	VM_OBJECT_WUNLOCK(vm_obj);
1859 }
1860 
1861 static void
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object * obj)1862 i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1863 {
1864 	vm_page_t page;
1865 	int page_count, i;
1866 
1867 	KASSERT(obj->madv != I915_MADV_PURGED_INTERNAL, ("Purged object"));
1868 
1869 	if (obj->tiling_mode != I915_TILING_NONE)
1870 		i915_gem_object_save_bit_17_swizzle(obj);
1871 	if (obj->madv == I915_MADV_DONTNEED)
1872 		obj->dirty = 0;
1873 	page_count = obj->base.size / PAGE_SIZE;
1874 	VM_OBJECT_WLOCK(obj->base.vm_obj);
1875 #if GEM_PARANOID_CHECK_GTT
1876 	i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count);
1877 #endif
1878 	for (i = 0; i < page_count; i++) {
1879 		page = obj->pages[i];
1880 		if (obj->dirty)
1881 			vm_page_dirty(page);
1882 		if (obj->madv == I915_MADV_WILLNEED)
1883 			vm_page_reference(page);
1884 		vm_page_lock(page);
1885 		vm_page_unwire(obj->pages[i], PQ_ACTIVE);
1886 		vm_page_unlock(page);
1887 		atomic_add_long(&i915_gem_wired_pages_cnt, -1);
1888 	}
1889 	VM_OBJECT_WUNLOCK(obj->base.vm_obj);
1890 	obj->dirty = 0;
1891 	free(obj->pages, DRM_I915_GEM);
1892 	obj->pages = NULL;
1893 }
1894 
1895 static int
i915_gpu_is_active(struct drm_device * dev)1896 i915_gpu_is_active(struct drm_device *dev)
1897 {
1898 	drm_i915_private_t *dev_priv = dev->dev_private;
1899 
1900 	return (!list_empty(&dev_priv->mm.flushing_list) ||
1901 	    !list_empty(&dev_priv->mm.active_list));
1902 }
1903 
1904 static void
i915_gem_lowmem(void * arg)1905 i915_gem_lowmem(void *arg)
1906 {
1907 	struct drm_device *dev;
1908 	struct drm_i915_private *dev_priv;
1909 	struct drm_i915_gem_object *obj, *next;
1910 	int cnt, cnt_fail, cnt_total;
1911 
1912 	dev = arg;
1913 	dev_priv = dev->dev_private;
1914 
1915 	if (!sx_try_xlock(&dev->dev_struct_lock))
1916 		return;
1917 
1918 	CTR0(KTR_DRM, "gem_lowmem");
1919 
1920 rescan:
1921 	/* first scan for clean buffers */
1922 	i915_gem_retire_requests(dev);
1923 
1924 	cnt_total = cnt_fail = cnt = 0;
1925 
1926 	list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
1927 	    mm_list) {
1928 		if (i915_gem_object_is_purgeable(obj)) {
1929 			if (i915_gem_object_unbind(obj) != 0)
1930 				cnt_total++;
1931 		} else
1932 			cnt_total++;
1933 	}
1934 
1935 	/* second pass, evict/count anything still on the inactive list */
1936 	list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list,
1937 	    mm_list) {
1938 		if (i915_gem_object_unbind(obj) == 0)
1939 			cnt++;
1940 		else
1941 			cnt_fail++;
1942 	}
1943 
1944 	if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) {
1945 		/*
1946 		 * We are desperate for pages, so as a last resort, wait
1947 		 * for the GPU to finish and discard whatever we can.
1948 		 * This has a dramatic impact to reduce the number of
1949 		 * OOM-killer events whilst running the GPU aggressively.
1950 		 */
1951 		if (i915_gpu_idle(dev) == 0)
1952 			goto rescan;
1953 	}
1954 	DRM_UNLOCK(dev);
1955 }
1956 
1957 static int
i915_gem_object_get_pages_range(struct drm_i915_gem_object * obj,off_t start,off_t end)1958 i915_gem_object_get_pages_range(struct drm_i915_gem_object *obj,
1959     off_t start, off_t end)
1960 {
1961 	vm_object_t vm_obj;
1962 	vm_page_t page;
1963 	vm_pindex_t si, ei, i;
1964 	bool need_swizzle, fresh;
1965 
1966 	need_swizzle = i915_gem_object_needs_bit17_swizzle(obj) != 0;
1967 	vm_obj = obj->base.vm_obj;
1968 	si = OFF_TO_IDX(trunc_page(start));
1969 	ei = OFF_TO_IDX(round_page(end));
1970 	VM_OBJECT_WLOCK(vm_obj);
1971 	for (i = si; i < ei; i++) {
1972 		page = i915_gem_wire_page(vm_obj, i, &fresh);
1973 		if (page == NULL)
1974 			goto failed;
1975 		if (need_swizzle && fresh)
1976 			i915_gem_object_do_bit_17_swizzle_page(obj, page);
1977 	}
1978 	VM_OBJECT_WUNLOCK(vm_obj);
1979 	return (0);
1980 failed:
1981 	i915_gem_object_put_pages_range_locked(obj, si, i);
1982 	VM_OBJECT_WUNLOCK(vm_obj);
1983 	return (-EIO);
1984 }
1985 
1986 static int
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object * obj,int flags)1987 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1988     int flags)
1989 {
1990 	vm_object_t vm_obj;
1991 	vm_page_t page;
1992 	vm_pindex_t i, page_count;
1993 	int res;
1994 
1995 	KASSERT(obj->pages == NULL, ("Obj already has pages"));
1996 
1997 	page_count = OFF_TO_IDX(obj->base.size);
1998 	obj->pages = malloc(page_count * sizeof(vm_page_t), DRM_I915_GEM,
1999 	    M_WAITOK);
2000 	res = i915_gem_object_get_pages_range(obj, 0, obj->base.size);
2001 	if (res != 0) {
2002 		free(obj->pages, DRM_I915_GEM);
2003 		obj->pages = NULL;
2004 		return (res);
2005 	}
2006 	vm_obj = obj->base.vm_obj;
2007 	VM_OBJECT_WLOCK(vm_obj);
2008 	for (i = 0, page = vm_page_lookup(vm_obj, 0); i < page_count;
2009 	    i++, page = vm_page_next(page)) {
2010 		KASSERT(page->pindex == i, ("pindex %jx %jx",
2011 		    (uintmax_t)page->pindex, (uintmax_t)i));
2012 		obj->pages[i] = page;
2013 	}
2014 	VM_OBJECT_WUNLOCK(vm_obj);
2015 	return (0);
2016 }
2017 
2018 void
i915_gem_object_move_to_active(struct drm_i915_gem_object * obj,struct intel_ring_buffer * ring,uint32_t seqno)2019 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
2020 			       struct intel_ring_buffer *ring, uint32_t seqno)
2021 {
2022 	struct drm_device *dev = obj->base.dev;
2023 	struct drm_i915_private *dev_priv = dev->dev_private;
2024 	struct drm_i915_fence_reg *reg;
2025 
2026 	KASSERT(ring != NULL, ("NULL ring"));
2027 	obj->ring = ring;
2028 
2029 	/* Add a reference if we're newly entering the active list. */
2030 	if (!obj->active) {
2031 		drm_gem_object_reference(&obj->base);
2032 		obj->active = 1;
2033 	}
2034 
2035 	/* Move from whatever list we were on to the tail of execution. */
2036 	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
2037 	list_move_tail(&obj->ring_list, &ring->active_list);
2038 
2039 	obj->last_rendering_seqno = seqno;
2040 	if (obj->fenced_gpu_access) {
2041 		obj->last_fenced_seqno = seqno;
2042 
2043 		/* Bump MRU to take account of the delayed flush */
2044 		if (obj->fence_reg != I915_FENCE_REG_NONE) {
2045 			reg = &dev_priv->fence_regs[obj->fence_reg];
2046 			list_move_tail(&reg->lru_list,
2047 				       &dev_priv->mm.fence_list);
2048 		}
2049 	}
2050 }
2051 
2052 static void
i915_gem_object_move_off_active(struct drm_i915_gem_object * obj)2053 i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
2054 {
2055 	list_del_init(&obj->ring_list);
2056 	obj->last_rendering_seqno = 0;
2057 	obj->last_fenced_seqno = 0;
2058 }
2059 
2060 static void
i915_gem_object_move_to_flushing(struct drm_i915_gem_object * obj)2061 i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
2062 {
2063 	struct drm_device *dev = obj->base.dev;
2064 	drm_i915_private_t *dev_priv = dev->dev_private;
2065 
2066 	KASSERT(obj->active, ("Object not active"));
2067 	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
2068 
2069 	i915_gem_object_move_off_active(obj);
2070 }
2071 
2072 static void
i915_gem_object_move_to_inactive(struct drm_i915_gem_object * obj)2073 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
2074 {
2075 	struct drm_device *dev = obj->base.dev;
2076 	struct drm_i915_private *dev_priv = dev->dev_private;
2077 
2078 	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2079 
2080 	KASSERT(list_empty(&obj->gpu_write_list), ("On gpu_write_list"));
2081 	KASSERT(obj->active, ("Object not active"));
2082 	obj->ring = NULL;
2083 
2084 	i915_gem_object_move_off_active(obj);
2085 	obj->fenced_gpu_access = false;
2086 
2087 	obj->active = 0;
2088 	obj->pending_gpu_write = false;
2089 	drm_gem_object_unreference(&obj->base);
2090 
2091 #if 1
2092 	KIB_NOTYET();
2093 #else
2094 	WARN_ON(i915_verify_lists(dev));
2095 #endif
2096 }
2097 
2098 static u32
i915_gem_get_seqno(struct drm_device * dev)2099 i915_gem_get_seqno(struct drm_device *dev)
2100 {
2101 	drm_i915_private_t *dev_priv = dev->dev_private;
2102 	u32 seqno = dev_priv->next_seqno;
2103 
2104 	/* reserve 0 for non-seqno */
2105 	if (++dev_priv->next_seqno == 0)
2106 		dev_priv->next_seqno = 1;
2107 
2108 	return seqno;
2109 }
2110 
2111 u32
i915_gem_next_request_seqno(struct intel_ring_buffer * ring)2112 i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
2113 {
2114 	if (ring->outstanding_lazy_request == 0)
2115 		ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);
2116 
2117 	return ring->outstanding_lazy_request;
2118 }
2119 
2120 int
i915_add_request(struct intel_ring_buffer * ring,struct drm_file * file,struct drm_i915_gem_request * request)2121 i915_add_request(struct intel_ring_buffer *ring,
2122 		 struct drm_file *file,
2123 		 struct drm_i915_gem_request *request)
2124 {
2125 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
2126 	struct drm_i915_file_private *file_priv;
2127 	uint32_t seqno;
2128 	u32 request_ring_position;
2129 	int was_empty;
2130 	int ret;
2131 
2132 	KASSERT(request != NULL, ("NULL request in add"));
2133 	DRM_LOCK_ASSERT(ring->dev);
2134 
2135 	seqno = i915_gem_next_request_seqno(ring);
2136 	request_ring_position = intel_ring_get_tail(ring);
2137 
2138 	ret = ring->add_request(ring, &seqno);
2139 	if (ret != 0)
2140 	    return ret;
2141 
2142 	CTR2(KTR_DRM, "request_add %s %d", ring->name, seqno);
2143 
2144 	request->seqno = seqno;
2145 	request->ring = ring;
2146 	request->tail = request_ring_position;
2147 	request->emitted_jiffies = ticks;
2148 	was_empty = list_empty(&ring->request_list);
2149 	list_add_tail(&request->list, &ring->request_list);
2150 
2151 	if (file) {
2152 		file_priv = file->driver_priv;
2153 
2154 		mtx_lock(&file_priv->mm.lck);
2155 		request->file_priv = file_priv;
2156 		list_add_tail(&request->client_list,
2157 			      &file_priv->mm.request_list);
2158 		mtx_unlock(&file_priv->mm.lck);
2159 	}
2160 
2161 	ring->outstanding_lazy_request = 0;
2162 
2163 	if (!dev_priv->mm.suspended) {
2164 		if (i915_enable_hangcheck) {
2165 			callout_schedule(&dev_priv->hangcheck_timer,
2166 			    DRM_I915_HANGCHECK_PERIOD);
2167 		}
2168 		if (was_empty)
2169 			taskqueue_enqueue_timeout(dev_priv->tq,
2170 			    &dev_priv->mm.retire_task, hz);
2171 	}
2172 
2173 	return 0;
2174 }
2175 
2176 static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request * request)2177 i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
2178 {
2179 	struct drm_i915_file_private *file_priv = request->file_priv;
2180 
2181 	if (!file_priv)
2182 		return;
2183 
2184 	DRM_LOCK_ASSERT(request->ring->dev);
2185 
2186 	mtx_lock(&file_priv->mm.lck);
2187 	if (request->file_priv) {
2188 		list_del(&request->client_list);
2189 		request->file_priv = NULL;
2190 	}
2191 	mtx_unlock(&file_priv->mm.lck);
2192 }
2193 
i915_gem_reset_ring_lists(struct drm_i915_private * dev_priv,struct intel_ring_buffer * ring)2194 static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
2195 				      struct intel_ring_buffer *ring)
2196 {
2197 	if (ring->dev != NULL)
2198 		DRM_LOCK_ASSERT(ring->dev);
2199 
2200 	while (!list_empty(&ring->request_list)) {
2201 		struct drm_i915_gem_request *request;
2202 
2203 		request = list_first_entry(&ring->request_list,
2204 					   struct drm_i915_gem_request,
2205 					   list);
2206 
2207 		list_del(&request->list);
2208 		i915_gem_request_remove_from_client(request);
2209 		free(request, DRM_I915_GEM);
2210 	}
2211 
2212 	while (!list_empty(&ring->active_list)) {
2213 		struct drm_i915_gem_object *obj;
2214 
2215 		obj = list_first_entry(&ring->active_list,
2216 				       struct drm_i915_gem_object,
2217 				       ring_list);
2218 
2219 		obj->base.write_domain = 0;
2220 		list_del_init(&obj->gpu_write_list);
2221 		i915_gem_object_move_to_inactive(obj);
2222 	}
2223 }
2224 
i915_gem_reset_fences(struct drm_device * dev)2225 static void i915_gem_reset_fences(struct drm_device *dev)
2226 {
2227 	struct drm_i915_private *dev_priv = dev->dev_private;
2228 	int i;
2229 
2230 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
2231 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
2232 
2233 		i915_gem_write_fence(dev, i, NULL);
2234 
2235 		if (reg->obj)
2236 			i915_gem_object_fence_lost(reg->obj);
2237 
2238 		reg->pin_count = 0;
2239 		reg->obj = NULL;
2240 		INIT_LIST_HEAD(&reg->lru_list);
2241 	}
2242 
2243 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
2244 }
2245 
i915_gem_reset(struct drm_device * dev)2246 void i915_gem_reset(struct drm_device *dev)
2247 {
2248 	struct drm_i915_private *dev_priv = dev->dev_private;
2249 	struct drm_i915_gem_object *obj;
2250 	struct intel_ring_buffer *ring;
2251 	int i;
2252 
2253 	for_each_ring(ring, dev_priv, i)
2254 		i915_gem_reset_ring_lists(dev_priv, ring);
2255 
2256 	/* Remove anything from the flushing lists. The GPU cache is likely
2257 	 * to be lost on reset along with the data, so simply move the
2258 	 * lost bo to the inactive list.
2259 	 */
2260 	while (!list_empty(&dev_priv->mm.flushing_list)) {
2261 		obj = list_first_entry(&dev_priv->mm.flushing_list,
2262 				      struct drm_i915_gem_object,
2263 				      mm_list);
2264 
2265 		obj->base.write_domain = 0;
2266 		list_del_init(&obj->gpu_write_list);
2267 		i915_gem_object_move_to_inactive(obj);
2268 	}
2269 
2270 	/* Move everything out of the GPU domains to ensure we do any
2271 	 * necessary invalidation upon reuse.
2272 	 */
2273 	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
2274 		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2275 	}
2276 
2277 	/* The fence registers are invalidated so clear them out */
2278 	i915_gem_reset_fences(dev);
2279 }
2280 
2281 /**
2282  * This function clears the request list as sequence numbers are passed.
2283  */
2284 void
i915_gem_retire_requests_ring(struct intel_ring_buffer * ring)2285 i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
2286 {
2287 	uint32_t seqno;
2288 	int i;
2289 
2290 	if (list_empty(&ring->request_list))
2291 		return;
2292 
2293 	seqno = ring->get_seqno(ring);
2294 	CTR2(KTR_DRM, "retire_request_ring %s %d", ring->name, seqno);
2295 
2296 	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
2297 		if (seqno >= ring->sync_seqno[i])
2298 			ring->sync_seqno[i] = 0;
2299 
2300 	while (!list_empty(&ring->request_list)) {
2301 		struct drm_i915_gem_request *request;
2302 
2303 		request = list_first_entry(&ring->request_list,
2304 					   struct drm_i915_gem_request,
2305 					   list);
2306 
2307 		if (!i915_seqno_passed(seqno, request->seqno))
2308 			break;
2309 
2310 		CTR2(KTR_DRM, "retire_request_seqno_passed %s %d",
2311 		    ring->name, seqno);
2312 		ring->last_retired_head = request->tail;
2313 
2314 		list_del(&request->list);
2315 		i915_gem_request_remove_from_client(request);
2316 		free(request, DRM_I915_GEM);
2317 	}
2318 
2319 	/* Move any buffers on the active list that are no longer referenced
2320 	 * by the ringbuffer to the flushing/inactive lists as appropriate.
2321 	 */
2322 	while (!list_empty(&ring->active_list)) {
2323 		struct drm_i915_gem_object *obj;
2324 
2325 		obj = list_first_entry(&ring->active_list,
2326 				      struct drm_i915_gem_object,
2327 				      ring_list);
2328 
2329 		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
2330 			break;
2331 
2332 		if (obj->base.write_domain != 0)
2333 			i915_gem_object_move_to_flushing(obj);
2334 		else
2335 			i915_gem_object_move_to_inactive(obj);
2336 	}
2337 
2338 	if (ring->trace_irq_seqno &&
2339 	    i915_seqno_passed(seqno, ring->trace_irq_seqno)) {
2340 		struct drm_i915_private *dev_priv = ring->dev->dev_private;
2341 		mtx_lock(&dev_priv->irq_lock);
2342 		ring->irq_put(ring);
2343 		mtx_unlock(&dev_priv->irq_lock);
2344 		ring->trace_irq_seqno = 0;
2345 	}
2346 }
2347 
2348 void
i915_gem_retire_requests(struct drm_device * dev)2349 i915_gem_retire_requests(struct drm_device *dev)
2350 {
2351 	drm_i915_private_t *dev_priv = dev->dev_private;
2352 	struct intel_ring_buffer *ring;
2353 	int i;
2354 
2355 	for_each_ring(ring, dev_priv, i)
2356 		i915_gem_retire_requests_ring(ring);
2357 }
2358 
2359 static void
i915_gem_process_flushing_list(struct intel_ring_buffer * ring,uint32_t flush_domains)2360 i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
2361     uint32_t flush_domains)
2362 {
2363 	struct drm_i915_gem_object *obj, *next;
2364 	uint32_t old_write_domain;
2365 
2366 	list_for_each_entry_safe(obj, next, &ring->gpu_write_list,
2367 	    gpu_write_list) {
2368 		if (obj->base.write_domain & flush_domains) {
2369 			old_write_domain = obj->base.write_domain;
2370 			obj->base.write_domain = 0;
2371 			list_del_init(&obj->gpu_write_list);
2372 			i915_gem_object_move_to_active(obj, ring,
2373 			    i915_gem_next_request_seqno(ring));
2374 
2375 	CTR3(KTR_DRM, "object_change_domain process_flush %p %x %x",
2376 			    obj, obj->base.read_domains, old_write_domain);
2377 		}
2378 	}
2379 }
2380 
2381 int
i915_gem_flush_ring(struct intel_ring_buffer * ring,uint32_t invalidate_domains,uint32_t flush_domains)2382 i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains,
2383     uint32_t flush_domains)
2384 {
2385 	int ret;
2386 
2387 	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2388 		return 0;
2389 
2390 	CTR3(KTR_DRM, "ring_flush %s %x %x", ring->name, invalidate_domains,
2391 	    flush_domains);
2392 	ret = ring->flush(ring, invalidate_domains, flush_domains);
2393 	if (ret)
2394 		return ret;
2395 
2396 	if (flush_domains & I915_GEM_GPU_DOMAINS)
2397 		i915_gem_process_flushing_list(ring, flush_domains);
2398 	return 0;
2399 }
2400 
2401 static void
i915_gem_retire_task_handler(void * arg,int pending)2402 i915_gem_retire_task_handler(void *arg, int pending)
2403 {
2404 	drm_i915_private_t *dev_priv;
2405 	struct drm_device *dev;
2406 	struct intel_ring_buffer *ring;
2407 	bool idle;
2408 	int i;
2409 
2410 	dev_priv = arg;
2411 	dev = dev_priv->dev;
2412 
2413 	/* Come back later if the device is busy... */
2414 	if (!sx_try_xlock(&dev->dev_struct_lock)) {
2415 		taskqueue_enqueue_timeout(dev_priv->tq,
2416 		    &dev_priv->mm.retire_task, hz);
2417 		return;
2418 	}
2419 
2420 	CTR0(KTR_DRM, "retire_task");
2421 
2422 	i915_gem_retire_requests(dev);
2423 
2424 	/* Send a periodic flush down the ring so we don't hold onto GEM
2425 	 * objects indefinitely.
2426 	 */
2427 	idle = true;
2428 	for_each_ring(ring, dev_priv, i) {
2429 		struct intel_ring_buffer *ring = &dev_priv->rings[i];
2430 
2431 		if (!list_empty(&ring->gpu_write_list)) {
2432 			struct drm_i915_gem_request *request;
2433 			int ret;
2434 
2435 			ret = i915_gem_flush_ring(ring,
2436 						  0, I915_GEM_GPU_DOMAINS);
2437 			request = malloc(sizeof(*request), DRM_I915_GEM,
2438 			    M_WAITOK | M_ZERO);
2439 			if (ret || request == NULL ||
2440 			    i915_add_request(ring, NULL, request))
2441 				free(request, DRM_I915_GEM);
2442 		}
2443 
2444 		idle &= list_empty(&ring->request_list);
2445 	}
2446 
2447 	if (!dev_priv->mm.suspended && !idle)
2448 		taskqueue_enqueue_timeout(dev_priv->tq,
2449 		    &dev_priv->mm.retire_task, hz);
2450 
2451 	DRM_UNLOCK(dev);
2452 }
2453 
2454 int
i915_gem_object_sync(struct drm_i915_gem_object * obj,struct intel_ring_buffer * to)2455 i915_gem_object_sync(struct drm_i915_gem_object *obj,
2456 		     struct intel_ring_buffer *to)
2457 {
2458 	struct intel_ring_buffer *from = obj->ring;
2459 	u32 seqno;
2460 	int ret, idx;
2461 
2462 	if (from == NULL || to == from)
2463 		return 0;
2464 
2465 	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2466 		return i915_gem_object_wait_rendering(obj);
2467 
2468 	idx = intel_ring_sync_index(from, to);
2469 
2470 	seqno = obj->last_rendering_seqno;
2471 	if (seqno <= from->sync_seqno[idx])
2472 		return 0;
2473 
2474 	if (seqno == from->outstanding_lazy_request) {
2475 		struct drm_i915_gem_request *request;
2476 
2477 		request = malloc(sizeof(*request), DRM_I915_GEM,
2478 		    M_WAITOK | M_ZERO);
2479 		ret = i915_add_request(from, NULL, request);
2480 		if (ret) {
2481 			free(request, DRM_I915_GEM);
2482 			return ret;
2483 		}
2484 		seqno = request->seqno;
2485 	}
2486 
2487 
2488 	ret = to->sync_to(to, from, seqno);
2489 	if (!ret)
2490 		from->sync_seqno[idx] = seqno;
2491 
2492 	return ret;
2493 }
2494 
i915_gem_object_finish_gtt(struct drm_i915_gem_object * obj)2495 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2496 {
2497 	u32 old_write_domain, old_read_domains;
2498 
2499 	/* Act a barrier for all accesses through the GTT */
2500 	mb();
2501 
2502 	/* Force a pagefault for domain tracking on next user access */
2503 	i915_gem_release_mmap(obj);
2504 
2505 	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2506 		return;
2507 
2508 	old_read_domains = obj->base.read_domains;
2509 	old_write_domain = obj->base.write_domain;
2510 
2511 	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2512 	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2513 
2514 	CTR3(KTR_DRM, "object_change_domain finish gtt %p %x %x",
2515 	    obj, old_read_domains, old_write_domain);
2516 }
2517 
2518 /**
2519  * Unbinds an object from the GTT aperture.
2520  */
2521 int
i915_gem_object_unbind(struct drm_i915_gem_object * obj)2522 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2523 {
2524 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2525 	int ret = 0;
2526 
2527 	if (obj->gtt_space == NULL)
2528 		return 0;
2529 
2530 	if (obj->pin_count)
2531 		return -EINVAL;
2532 
2533 	ret = i915_gem_object_finish_gpu(obj);
2534 	if (ret == -ERESTARTSYS || ret == -EINTR)
2535 		return ret;
2536 
2537 	i915_gem_object_finish_gtt(obj);
2538 
2539 	if (ret == 0)
2540 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2541 	if (ret == -ERESTARTSYS || ret == -EINTR)
2542 		return ret;
2543 	if (ret != 0) {
2544 		i915_gem_clflush_object(obj);
2545 		obj->base.read_domains = obj->base.write_domain =
2546 		    I915_GEM_DOMAIN_CPU;
2547 	}
2548 
2549 	/* release the fence reg _after_ flushing */
2550 	ret = i915_gem_object_put_fence(obj);
2551 	if (ret)
2552 		return ret;
2553 
2554 	if (obj->has_global_gtt_mapping)
2555 		i915_gem_gtt_unbind_object(obj);
2556 	if (obj->has_aliasing_ppgtt_mapping) {
2557 		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2558 		obj->has_aliasing_ppgtt_mapping = 0;
2559 	}
2560 	i915_gem_gtt_finish_object(obj);
2561 
2562 	i915_gem_object_put_pages_gtt(obj);
2563 
2564 	list_del_init(&obj->gtt_list);
2565 	list_del_init(&obj->mm_list);
2566 	obj->map_and_fenceable = true;
2567 
2568 	drm_mm_put_block(obj->gtt_space);
2569 	obj->gtt_space = NULL;
2570 	obj->gtt_offset = 0;
2571 
2572 	if (i915_gem_object_is_purgeable(obj))
2573 		i915_gem_object_truncate(obj);
2574 	CTR1(KTR_DRM, "object_unbind %p", obj);
2575 
2576 	return ret;
2577 }
2578 
2579 static int
i915_ring_idle(struct intel_ring_buffer * ring)2580 i915_ring_idle(struct intel_ring_buffer *ring)
2581 {
2582 	int ret;
2583 
2584 	if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2585 		return 0;
2586 
2587 	if (!list_empty(&ring->gpu_write_list)) {
2588 		ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS,
2589 		    I915_GEM_GPU_DOMAINS);
2590 		if (ret != 0)
2591 			return ret;
2592 	}
2593 
2594 	return (i915_wait_request(ring, i915_gem_next_request_seqno(ring)));
2595 }
2596 
i915_gpu_idle(struct drm_device * dev)2597 int i915_gpu_idle(struct drm_device *dev)
2598 {
2599 	drm_i915_private_t *dev_priv = dev->dev_private;
2600 	struct intel_ring_buffer *ring;
2601 	int ret, i;
2602 
2603 	/* Flush everything onto the inactive list. */
2604 	for_each_ring(ring, dev_priv, i) {
2605 		ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
2606 		if (ret)
2607 			return ret;
2608 
2609 		ret = i915_ring_idle(ring);
2610 		if (ret)
2611 			return ret;
2612 
2613 		/* Is the device fubar? */
2614 		if (!list_empty(&ring->gpu_write_list))
2615 			return -EBUSY;
2616 	}
2617 
2618 	return 0;
2619 }
2620 
sandybridge_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)2621 static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2622 					struct drm_i915_gem_object *obj)
2623 {
2624 	drm_i915_private_t *dev_priv = dev->dev_private;
2625 	uint64_t val;
2626 
2627 	if (obj) {
2628 		u32 size = obj->gtt_space->size;
2629 
2630 		val = (uint64_t)((obj->gtt_offset + size - 4096) &
2631 				 0xfffff000) << 32;
2632 		val |= obj->gtt_offset & 0xfffff000;
2633 		val |= (uint64_t)((obj->stride / 128) - 1) <<
2634 			SANDYBRIDGE_FENCE_PITCH_SHIFT;
2635 
2636 		if (obj->tiling_mode == I915_TILING_Y)
2637 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2638 		val |= I965_FENCE_REG_VALID;
2639 	} else
2640 		val = 0;
2641 
2642 	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2643 	POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2644 }
2645 
i965_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)2646 static void i965_write_fence_reg(struct drm_device *dev, int reg,
2647 				 struct drm_i915_gem_object *obj)
2648 {
2649 	drm_i915_private_t *dev_priv = dev->dev_private;
2650 	uint64_t val;
2651 
2652 	if (obj) {
2653 		u32 size = obj->gtt_space->size;
2654 
2655 		val = (uint64_t)((obj->gtt_offset + size - 4096) &
2656 				 0xfffff000) << 32;
2657 		val |= obj->gtt_offset & 0xfffff000;
2658 		val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2659 		if (obj->tiling_mode == I915_TILING_Y)
2660 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2661 		val |= I965_FENCE_REG_VALID;
2662 	} else
2663 		val = 0;
2664 
2665 	I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2666 	POSTING_READ(FENCE_REG_965_0 + reg * 8);
2667 }
2668 
i915_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)2669 static void i915_write_fence_reg(struct drm_device *dev, int reg,
2670 				 struct drm_i915_gem_object *obj)
2671 {
2672 	drm_i915_private_t *dev_priv = dev->dev_private;
2673 	u32 val;
2674 
2675 	if (obj) {
2676 		u32 size = obj->gtt_space->size;
2677 		int pitch_val;
2678 		int tile_width;
2679 
2680 		if ((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2681 		     (size & -size) != size ||
2682 		     (obj->gtt_offset & (size - 1)))
2683 			printf(
2684 		     "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2685 		     obj->gtt_offset, obj->map_and_fenceable, size);
2686 
2687 		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2688 			tile_width = 128;
2689 		else
2690 			tile_width = 512;
2691 
2692 		/* Note: pitch better be a power of two tile widths */
2693 		pitch_val = obj->stride / tile_width;
2694 		pitch_val = ffs(pitch_val) - 1;
2695 
2696 		val = obj->gtt_offset;
2697 		if (obj->tiling_mode == I915_TILING_Y)
2698 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2699 		val |= I915_FENCE_SIZE_BITS(size);
2700 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2701 		val |= I830_FENCE_REG_VALID;
2702 	} else
2703 		val = 0;
2704 
2705 	if (reg < 8)
2706 		reg = FENCE_REG_830_0 + reg * 4;
2707 	else
2708 		reg = FENCE_REG_945_8 + (reg - 8) * 4;
2709 
2710 	I915_WRITE(reg, val);
2711 	POSTING_READ(reg);
2712 }
2713 
i830_write_fence_reg(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)2714 static void i830_write_fence_reg(struct drm_device *dev, int reg,
2715 				struct drm_i915_gem_object *obj)
2716 {
2717 	drm_i915_private_t *dev_priv = dev->dev_private;
2718 	uint32_t val;
2719 
2720 	if (obj) {
2721 		u32 size = obj->gtt_space->size;
2722 		uint32_t pitch_val;
2723 
2724 		if ((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2725 		     (size & -size) != size ||
2726 		     (obj->gtt_offset & (size - 1)))
2727 		    printf(
2728 		     "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2729 		     obj->gtt_offset, size);
2730 
2731 		pitch_val = obj->stride / 128;
2732 		pitch_val = ffs(pitch_val) - 1;
2733 
2734 		val = obj->gtt_offset;
2735 		if (obj->tiling_mode == I915_TILING_Y)
2736 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2737 		val |= I830_FENCE_SIZE_BITS(size);
2738 		val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2739 		val |= I830_FENCE_REG_VALID;
2740 	} else
2741 		val = 0;
2742 
2743 	I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2744 	POSTING_READ(FENCE_REG_830_0 + reg * 4);
2745 }
2746 
i915_gem_write_fence(struct drm_device * dev,int reg,struct drm_i915_gem_object * obj)2747 static void i915_gem_write_fence(struct drm_device *dev, int reg,
2748 				 struct drm_i915_gem_object *obj)
2749 {
2750 	switch (INTEL_INFO(dev)->gen) {
2751 	case 7:
2752 	case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2753 	case 5:
2754 	case 4: i965_write_fence_reg(dev, reg, obj); break;
2755 	case 3: i915_write_fence_reg(dev, reg, obj); break;
2756 	case 2: i830_write_fence_reg(dev, reg, obj); break;
2757 	default: break;
2758 	}
2759 }
2760 
fence_number(struct drm_i915_private * dev_priv,struct drm_i915_fence_reg * fence)2761 static inline int fence_number(struct drm_i915_private *dev_priv,
2762 			       struct drm_i915_fence_reg *fence)
2763 {
2764 	return fence - dev_priv->fence_regs;
2765 }
2766 
i915_gem_object_update_fence(struct drm_i915_gem_object * obj,struct drm_i915_fence_reg * fence,bool enable)2767 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2768 					 struct drm_i915_fence_reg *fence,
2769 					 bool enable)
2770 {
2771 	struct drm_device *dev = obj->base.dev;
2772 	struct drm_i915_private *dev_priv = dev->dev_private;
2773 	int fence_reg = fence_number(dev_priv, fence);
2774 
2775 	i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
2776 
2777 	if (enable) {
2778 		obj->fence_reg = fence_reg;
2779 		fence->obj = obj;
2780 		list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2781 	} else {
2782 		obj->fence_reg = I915_FENCE_REG_NONE;
2783 		fence->obj = NULL;
2784 		list_del_init(&fence->lru_list);
2785 	}
2786 }
2787 
2788 static int
i915_gem_object_flush_fence(struct drm_i915_gem_object * obj)2789 i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2790 {
2791 	int ret;
2792 
2793 	if (obj->fenced_gpu_access) {
2794 		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2795 			ret = i915_gem_flush_ring(obj->ring,
2796 						  0, obj->base.write_domain);
2797 			if (ret)
2798 				return ret;
2799 		}
2800 
2801 		obj->fenced_gpu_access = false;
2802 	}
2803 
2804 	if (obj->last_fenced_seqno) {
2805 		ret = i915_wait_request(obj->ring,
2806 					obj->last_fenced_seqno);
2807 		if (ret)
2808 			return ret;
2809 
2810 		obj->last_fenced_seqno = 0;
2811 	}
2812 
2813 	/* Ensure that all CPU reads are completed before installing a fence
2814 	 * and all writes before removing the fence.
2815 	 */
2816 	if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2817 		mb();
2818 
2819 	return 0;
2820 }
2821 
2822 int
i915_gem_object_put_fence(struct drm_i915_gem_object * obj)2823 i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2824 {
2825 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2826 	int ret;
2827 
2828 	ret = i915_gem_object_flush_fence(obj);
2829 	if (ret)
2830 		return ret;
2831 
2832 	if (obj->fence_reg == I915_FENCE_REG_NONE)
2833 		return 0;
2834 
2835 	i915_gem_object_update_fence(obj,
2836 				     &dev_priv->fence_regs[obj->fence_reg],
2837 				     false);
2838 	i915_gem_object_fence_lost(obj);
2839 
2840 	return 0;
2841 }
2842 
2843 static struct drm_i915_fence_reg *
i915_find_fence_reg(struct drm_device * dev)2844 i915_find_fence_reg(struct drm_device *dev)
2845 {
2846 	struct drm_i915_private *dev_priv = dev->dev_private;
2847 	struct drm_i915_fence_reg *reg, *avail;
2848 	int i;
2849 
2850 	/* First try to find a free reg */
2851 	avail = NULL;
2852 	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2853 		reg = &dev_priv->fence_regs[i];
2854 		if (!reg->obj)
2855 			return reg;
2856 
2857 		if (!reg->pin_count)
2858 			avail = reg;
2859 	}
2860 
2861 	if (avail == NULL)
2862 		return NULL;
2863 
2864 	/* None available, try to steal one or wait for a user to finish */
2865 	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2866 		if (reg->pin_count)
2867 			continue;
2868 
2869 		return reg;
2870 	}
2871 
2872 	return NULL;
2873 }
2874 
2875 /**
2876  * i915_gem_object_get_fence - set up fencing for an object
2877  * @obj: object to map through a fence reg
2878  *
2879  * When mapping objects through the GTT, userspace wants to be able to write
2880  * to them without having to worry about swizzling if the object is tiled.
2881  * This function walks the fence regs looking for a free one for @obj,
2882  * stealing one if it can't find any.
2883  *
2884  * It then sets up the reg based on the object's properties: address, pitch
2885  * and tiling format.
2886  *
2887  * For an untiled surface, this removes any existing fence.
2888  */
2889 int
i915_gem_object_get_fence(struct drm_i915_gem_object * obj)2890 i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2891 {
2892 	struct drm_device *dev = obj->base.dev;
2893 	struct drm_i915_private *dev_priv = dev->dev_private;
2894 	bool enable = obj->tiling_mode != I915_TILING_NONE;
2895 	struct drm_i915_fence_reg *reg;
2896 	int ret;
2897 
2898 	/* Have we updated the tiling parameters upon the object and so
2899 	 * will need to serialise the write to the associated fence register?
2900 	 */
2901 	if (obj->fence_dirty) {
2902 		ret = i915_gem_object_flush_fence(obj);
2903 		if (ret)
2904 			return ret;
2905 	}
2906 
2907 	/* Just update our place in the LRU if our fence is getting reused. */
2908 	if (obj->fence_reg != I915_FENCE_REG_NONE) {
2909 		reg = &dev_priv->fence_regs[obj->fence_reg];
2910 		if (!obj->fence_dirty) {
2911 			list_move_tail(&reg->lru_list,
2912 				       &dev_priv->mm.fence_list);
2913 			return 0;
2914 		}
2915 	} else if (enable) {
2916 		reg = i915_find_fence_reg(dev);
2917 		if (reg == NULL)
2918 			return -EDEADLK;
2919 
2920 		if (reg->obj) {
2921 			struct drm_i915_gem_object *old = reg->obj;
2922 
2923 			ret = i915_gem_object_flush_fence(old);
2924 			if (ret)
2925 				return ret;
2926 
2927 			i915_gem_object_fence_lost(old);
2928 		}
2929 	} else
2930 		return 0;
2931 
2932 	i915_gem_object_update_fence(obj, reg, enable);
2933 	obj->fence_dirty = false;
2934 
2935 	return 0;
2936 }
2937 
2938 /**
2939  * Finds free space in the GTT aperture and binds the object there.
2940  */
2941 static int
i915_gem_object_bind_to_gtt(struct drm_i915_gem_object * obj,unsigned alignment,bool map_and_fenceable)2942 i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2943 			    unsigned alignment,
2944 			    bool map_and_fenceable)
2945 {
2946 	struct drm_device *dev = obj->base.dev;
2947 	drm_i915_private_t *dev_priv = dev->dev_private;
2948 	struct drm_mm_node *free_space;
2949 	u32 size, fence_size, fence_alignment, unfenced_alignment;
2950 	bool mappable, fenceable;
2951 	int ret;
2952 
2953 	if (obj->madv != I915_MADV_WILLNEED) {
2954 		DRM_ERROR("Attempting to bind a purgeable object\n");
2955 		return -EINVAL;
2956 	}
2957 
2958 	fence_size = i915_gem_get_gtt_size(dev,
2959 					   obj->base.size,
2960 					   obj->tiling_mode);
2961 	fence_alignment = i915_gem_get_gtt_alignment(dev,
2962 						     obj->base.size,
2963 						     obj->tiling_mode);
2964 	unfenced_alignment =
2965 		i915_gem_get_unfenced_gtt_alignment(dev,
2966 						    obj->base.size,
2967 						    obj->tiling_mode);
2968 
2969 	if (alignment == 0)
2970 		alignment = map_and_fenceable ? fence_alignment :
2971 						unfenced_alignment;
2972 	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2973 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2974 		return -EINVAL;
2975 	}
2976 
2977 	size = map_and_fenceable ? fence_size : obj->base.size;
2978 
2979 	/* If the object is bigger than the entire aperture, reject it early
2980 	 * before evicting everything in a vain attempt to find space.
2981 	 */
2982 	if (obj->base.size >
2983 	    (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2984 		DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2985 		return -E2BIG;
2986 	}
2987 
2988  search_free:
2989 	if (map_and_fenceable)
2990 		free_space = drm_mm_search_free_in_range(
2991 		    &dev_priv->mm.gtt_space, size, alignment, 0,
2992 		    dev_priv->mm.gtt_mappable_end, 0);
2993 	else
2994 		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2995 		    size, alignment, 0);
2996 	if (free_space != NULL) {
2997 		if (map_and_fenceable)
2998 			obj->gtt_space = drm_mm_get_block_range_generic(
2999 			    free_space, size, alignment, 0, 0,
3000 			    dev_priv->mm.gtt_mappable_end, 1);
3001 		else
3002 			obj->gtt_space = drm_mm_get_block_generic(free_space,
3003 			    size, alignment, 0, 1);
3004 	}
3005 	if (obj->gtt_space == NULL) {
3006 		ret = i915_gem_evict_something(dev, size, alignment,
3007 		    map_and_fenceable);
3008 		if (ret != 0)
3009 			return ret;
3010 		goto search_free;
3011 	}
3012 	ret = i915_gem_object_get_pages_gtt(obj, 0);
3013 	if (ret) {
3014 		drm_mm_put_block(obj->gtt_space);
3015 		obj->gtt_space = NULL;
3016 		/*
3017 		 * i915_gem_object_get_pages_gtt() cannot return
3018 		 * ENOMEM, since we use vm_page_grab().
3019 		 */
3020 		return ret;
3021 	}
3022 
3023 	ret = i915_gem_gtt_prepare_object(obj);
3024 	if (ret) {
3025 		i915_gem_object_put_pages_gtt(obj);
3026 		drm_mm_put_block(obj->gtt_space);
3027 		obj->gtt_space = NULL;
3028 		if (i915_gem_evict_everything(dev, false))
3029 			return ret;
3030 		goto search_free;
3031 	}
3032 
3033 	if (!dev_priv->mm.aliasing_ppgtt)
3034 		i915_gem_gtt_bind_object(obj, obj->cache_level);
3035 
3036 	list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
3037 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3038 
3039 	KASSERT((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0,
3040 	    ("Object in gpu read domain"));
3041 	KASSERT((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0,
3042 	    ("Object in gpu write domain"));
3043 
3044 	obj->gtt_offset = obj->gtt_space->start;
3045 
3046 	fenceable =
3047 		obj->gtt_space->size == fence_size &&
3048 		(obj->gtt_space->start & (fence_alignment - 1)) == 0;
3049 
3050 	mappable =
3051 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
3052 
3053 	obj->map_and_fenceable = mappable && fenceable;
3054 
3055 	CTR4(KTR_DRM, "object_bind %p %x %x %d", obj, obj->gtt_offset,
3056 	    obj->base.size, map_and_fenceable);
3057 	return 0;
3058 }
3059 
3060 void
i915_gem_clflush_object(struct drm_i915_gem_object * obj)3061 i915_gem_clflush_object(struct drm_i915_gem_object *obj)
3062 {
3063 	/* If we don't have a page list set up, then we're not pinned
3064 	 * to GPU, and we can ignore the cache flush because it'll happen
3065 	 * again at bind time.
3066 	 */
3067 	if (obj->pages == NULL)
3068 		return;
3069 
3070 	/* If the GPU is snooping the contents of the CPU cache,
3071 	 * we do not need to manually clear the CPU cache lines.  However,
3072 	 * the caches are only snooped when the render cache is
3073 	 * flushed/invalidated.  As we always have to emit invalidations
3074 	 * and flushes when moving into and out of the RENDER domain, correct
3075 	 * snooping behaviour occurs naturally as the result of our domain
3076 	 * tracking.
3077 	 */
3078 	if (obj->cache_level != I915_CACHE_NONE)
3079 		return;
3080 
3081 	CTR1(KTR_DRM, "object_clflush %p", obj);
3082 
3083 	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
3084 }
3085 
3086 /** Flushes the GTT write domain for the object if it's dirty. */
3087 static void
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object * obj)3088 i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3089 {
3090 	uint32_t old_write_domain;
3091 
3092 	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3093 		return;
3094 
3095 	/* No actual flushing is required for the GTT write domain.  Writes
3096 	 * to it immediately go to main memory as far as we know, so there's
3097 	 * no chipset flush.  It also doesn't land in render cache.
3098 	 *
3099 	 * However, we do have to enforce the order so that all writes through
3100 	 * the GTT land before any writes to the device, such as updates to
3101 	 * the GATT itself.
3102 	 */
3103 	wmb();
3104 
3105 	old_write_domain = obj->base.write_domain;
3106 	obj->base.write_domain = 0;
3107 
3108 	CTR3(KTR_DRM, "object_change_domain flush gtt_write %p %x %x", obj,
3109 	    obj->base.read_domains, old_write_domain);
3110 }
3111 
3112 /** Flushes the CPU write domain for the object if it's dirty. */
3113 static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object * obj)3114 i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
3115 {
3116 	uint32_t old_write_domain;
3117 
3118 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3119 		return;
3120 
3121 	i915_gem_clflush_object(obj);
3122 	intel_gtt_chipset_flush();
3123 	old_write_domain = obj->base.write_domain;
3124 	obj->base.write_domain = 0;
3125 
3126 	CTR3(KTR_DRM, "object_change_domain flush_cpu_write %p %x %x", obj,
3127 	    obj->base.read_domains, old_write_domain);
3128 }
3129 
3130 static int
i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object * obj)3131 i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
3132 {
3133 
3134 	if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
3135 		return (0);
3136 	return (i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain));
3137 }
3138 
3139 /**
3140  * Moves a single object to the GTT read, and possibly write domain.
3141  *
3142  * This function returns when the move is complete, including waiting on
3143  * flushes to occur.
3144  */
3145 int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object * obj,bool write)3146 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3147 {
3148 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
3149 	uint32_t old_write_domain, old_read_domains;
3150 	int ret;
3151 
3152 	/* Not valid to be called on unbound objects. */
3153 	if (obj->gtt_space == NULL)
3154 		return -EINVAL;
3155 
3156 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3157 		return 0;
3158 
3159 	ret = i915_gem_object_flush_gpu_write_domain(obj);
3160 	if (ret)
3161 		return ret;
3162 
3163 	if (obj->pending_gpu_write || write) {
3164 		ret = i915_gem_object_wait_rendering(obj);
3165 		if (ret)
3166 			return (ret);
3167 	}
3168 
3169 	i915_gem_object_flush_cpu_write_domain(obj);
3170 
3171 	old_write_domain = obj->base.write_domain;
3172 	old_read_domains = obj->base.read_domains;
3173 
3174 	/* It should now be out of any other write domains, and we can update
3175 	 * the domain values for our changes.
3176 	 */
3177 	KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) == 0,
3178 	    ("In GTT write domain"));
3179 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3180 	if (write) {
3181 		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3182 		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3183 		obj->dirty = 1;
3184 	}
3185 
3186 	CTR3(KTR_DRM, "object_change_domain set_to_gtt %p %x %x", obj,
3187 	    old_read_domains, old_write_domain);
3188 
3189 	/* And bump the LRU for this access */
3190 	if (i915_gem_object_is_inactive(obj))
3191 		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
3192 
3193 	return 0;
3194 }
3195 
i915_gem_object_set_cache_level(struct drm_i915_gem_object * obj,enum i915_cache_level cache_level)3196 int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3197 				    enum i915_cache_level cache_level)
3198 {
3199 	struct drm_device *dev = obj->base.dev;
3200 	drm_i915_private_t *dev_priv = dev->dev_private;
3201 	int ret;
3202 
3203 	if (obj->cache_level == cache_level)
3204 		return 0;
3205 
3206 	if (obj->pin_count) {
3207 		DRM_DEBUG("can not change the cache level of pinned objects\n");
3208 		return -EBUSY;
3209 	}
3210 
3211 	if (obj->gtt_space) {
3212 		ret = i915_gem_object_finish_gpu(obj);
3213 		if (ret)
3214 			return ret;
3215 
3216 		i915_gem_object_finish_gtt(obj);
3217 
3218 		/* Before SandyBridge, you could not use tiling or fence
3219 		 * registers with snooped memory, so relinquish any fences
3220 		 * currently pointing to our region in the aperture.
3221 		 */
3222 		if (INTEL_INFO(obj->base.dev)->gen < 6) {
3223 			ret = i915_gem_object_put_fence(obj);
3224 			if (ret)
3225 				return ret;
3226 		}
3227 
3228 		if (obj->has_global_gtt_mapping)
3229 			i915_gem_gtt_bind_object(obj, cache_level);
3230 		if (obj->has_aliasing_ppgtt_mapping)
3231 			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
3232 					       obj, cache_level);
3233 	}
3234 
3235 	if (cache_level == I915_CACHE_NONE) {
3236 		u32 old_read_domains, old_write_domain;
3237 
3238 		/* If we're coming from LLC cached, then we haven't
3239 		 * actually been tracking whether the data is in the
3240 		 * CPU cache or not, since we only allow one bit set
3241 		 * in obj->write_domain and have been skipping the clflushes.
3242 		 * Just set it to the CPU cache for now.
3243 		 */
3244 		KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0,
3245 		    ("obj %p in CPU write domain", obj));
3246 		KASSERT((obj->base.read_domains & ~I915_GEM_DOMAIN_CPU) == 0,
3247 		    ("obj %p in CPU read domain", obj));
3248 
3249 		old_read_domains = obj->base.read_domains;
3250 		old_write_domain = obj->base.write_domain;
3251 
3252 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3253 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3254 
3255 		CTR3(KTR_DRM, "object_change_domain set_cache_level %p %x %x",
3256 		    obj, old_read_domains, old_write_domain);
3257 	}
3258 
3259 	obj->cache_level = cache_level;
3260 	return 0;
3261 }
3262 
is_pin_display(struct drm_i915_gem_object * obj)3263 static bool is_pin_display(struct drm_i915_gem_object *obj)
3264 {
3265 	/* There are 3 sources that pin objects:
3266 	 *   1. The display engine (scanouts, sprites, cursors);
3267 	 *   2. Reservations for execbuffer;
3268 	 *   3. The user.
3269 	 *
3270 	 * We can ignore reservations as we hold the struct_mutex and
3271 	 * are only called outside of the reservation path.  The user
3272 	 * can only increment pin_count once, and so if after
3273 	 * subtracting the potential reference by the user, any pin_count
3274 	 * remains, it must be due to another use by the display engine.
3275 	 */
3276 	return obj->pin_count - !!obj->user_pin_count;
3277 }
3278 
3279 int
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object * obj,u32 alignment,struct intel_ring_buffer * pipelined)3280 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3281 				     u32 alignment,
3282 				     struct intel_ring_buffer *pipelined)
3283 {
3284 	u32 old_read_domains, old_write_domain;
3285 	int ret;
3286 
3287 	ret = i915_gem_object_flush_gpu_write_domain(obj);
3288 	if (ret)
3289 		return ret;
3290 
3291 	if (pipelined != obj->ring) {
3292 		ret = i915_gem_object_sync(obj, pipelined);
3293 		if (ret)
3294 			return ret;
3295 	}
3296 
3297 	/* Mark the pin_display early so that we account for the
3298 	 * display coherency whilst setting up the cache domains.
3299 	 */
3300 	obj->pin_display = true;
3301 
3302 	/* The display engine is not coherent with the LLC cache on gen6.  As
3303 	 * a result, we make sure that the pinning that is about to occur is
3304 	 * done with uncached PTEs. This is lowest common denominator for all
3305 	 * chipsets.
3306 	 *
3307 	 * However for gen6+, we could do better by using the GFDT bit instead
3308 	 * of uncaching, which would allow us to flush all the LLC-cached data
3309 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3310 	 */
3311 	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3312 	if (ret)
3313 		goto err_unpin_display;
3314 
3315 	/* As the user may map the buffer once pinned in the display plane
3316 	 * (e.g. libkms for the bootup splash), we have to ensure that we
3317 	 * always use map_and_fenceable for all scanout buffers.
3318 	 */
3319 	ret = i915_gem_object_pin(obj, alignment, true);
3320 	if (ret)
3321 		goto err_unpin_display;
3322 
3323 	i915_gem_object_flush_cpu_write_domain(obj);
3324 
3325 	old_write_domain = obj->base.write_domain;
3326 	old_read_domains = obj->base.read_domains;
3327 
3328 	KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) == 0,
3329 	    ("obj %p in GTT write domain", obj));
3330 	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3331 
3332 	CTR3(KTR_DRM, "object_change_domain pin_to_display_plan %p %x %x",
3333 	    obj, old_read_domains, obj->base.write_domain);
3334 
3335 	return 0;
3336 
3337 err_unpin_display:
3338 	obj->pin_display = is_pin_display(obj);
3339 	return ret;
3340 }
3341 
3342 void
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object * obj)3343 i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
3344 {
3345 	i915_gem_object_unpin(obj);
3346 	obj->pin_display = is_pin_display(obj);
3347 }
3348 
3349 int
i915_gem_object_finish_gpu(struct drm_i915_gem_object * obj)3350 i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3351 {
3352 	int ret;
3353 
3354 	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3355 		return 0;
3356 
3357 	if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3358 		ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
3359 		if (ret)
3360 			return ret;
3361 	}
3362 
3363 	ret = i915_gem_object_wait_rendering(obj);
3364 	if (ret)
3365 		return ret;
3366 
3367 	/* Ensure that we invalidate the GPU's caches and TLBs. */
3368 	obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3369 	return 0;
3370 }
3371 
3372 /**
3373  * Moves a single object to the CPU read, and possibly write domain.
3374  *
3375  * This function returns when the move is complete, including waiting on
3376  * flushes to occur.
3377  */
3378 int
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object * obj,bool write)3379 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3380 {
3381 	uint32_t old_write_domain, old_read_domains;
3382 	int ret;
3383 
3384 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3385 		return 0;
3386 
3387 	ret = i915_gem_object_flush_gpu_write_domain(obj);
3388 	if (ret)
3389 		return ret;
3390 
3391 	if (write || obj->pending_gpu_write) {
3392 		ret = i915_gem_object_wait_rendering(obj);
3393 		if (ret)
3394 			return ret;
3395 	}
3396 
3397 	i915_gem_object_flush_gtt_write_domain(obj);
3398 
3399 	old_write_domain = obj->base.write_domain;
3400 	old_read_domains = obj->base.read_domains;
3401 
3402 	/* Flush the CPU cache if it's still invalid. */
3403 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3404 		i915_gem_clflush_object(obj);
3405 
3406 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3407 	}
3408 
3409 	/* It should now be out of any other write domains, and we can update
3410 	 * the domain values for our changes.
3411 	 */
3412 	KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0,
3413 	    ("In cpu write domain"));
3414 
3415 	/* If we're writing through the CPU, then the GPU read domains will
3416 	 * need to be invalidated at next use.
3417 	 */
3418 	if (write) {
3419 		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3420 		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3421 	}
3422 
3423 	CTR3(KTR_DRM, "object_change_domain set_to_cpu %p %x %x", obj,
3424 	    old_read_domains, old_write_domain);
3425 
3426 	return 0;
3427 }
3428 
3429 /* Throttle our rendering by waiting until the ring has completed our requests
3430  * emitted over 20 msec ago.
3431  *
3432  * Note that if we were to use the current jiffies each time around the loop,
3433  * we wouldn't escape the function with any frames outstanding if the time to
3434  * render a frame was over 20ms.
3435  *
3436  * This should get us reasonable parallelism between CPU and GPU but also
3437  * relatively low latency when blocking on a particular request to finish.
3438  */
3439 static int
i915_gem_ring_throttle(struct drm_device * dev,struct drm_file * file)3440 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3441 {
3442 	struct drm_i915_private *dev_priv = dev->dev_private;
3443 	struct drm_i915_file_private *file_priv = file->driver_priv;
3444 	unsigned long recent_enough = ticks - (20 * hz / 1000);
3445 	struct drm_i915_gem_request *request;
3446 	struct intel_ring_buffer *ring = NULL;
3447 	u32 seqno = 0;
3448 	int ret;
3449 
3450 	if (atomic_load_acq_int(&dev_priv->mm.wedged))
3451 		return -EIO;
3452 
3453 	mtx_lock(&file_priv->mm.lck);
3454 	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3455 		if (time_after_eq(request->emitted_jiffies, recent_enough))
3456 			break;
3457 		ring = request->ring;
3458 		seqno = request->seqno;
3459 	}
3460 	mtx_unlock(&file_priv->mm.lck);
3461 	if (seqno == 0)
3462 		return 0;
3463 
3464 	ret = __wait_seqno(ring, seqno, true);
3465 	if (ret == 0)
3466 		taskqueue_enqueue_timeout(dev_priv->tq,
3467 		    &dev_priv->mm.retire_task, 0);
3468 
3469 	return ret;
3470 }
3471 
3472 int
i915_gem_object_pin(struct drm_i915_gem_object * obj,uint32_t alignment,bool map_and_fenceable)3473 i915_gem_object_pin(struct drm_i915_gem_object *obj,
3474 		    uint32_t alignment,
3475 		    bool map_and_fenceable)
3476 {
3477 	int ret;
3478 
3479 	if (obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)
3480 		return -EBUSY;
3481 
3482 	if (obj->gtt_space != NULL) {
3483 		if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3484 		    (map_and_fenceable && !obj->map_and_fenceable)) {
3485 			DRM_DEBUG("bo is already pinned with incorrect alignment:"
3486 			     " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3487 			     " obj->map_and_fenceable=%d\n",
3488 			     obj->gtt_offset, alignment,
3489 			     map_and_fenceable,
3490 			     obj->map_and_fenceable);
3491 			ret = i915_gem_object_unbind(obj);
3492 			if (ret)
3493 				return ret;
3494 		}
3495 	}
3496 
3497 	if (obj->gtt_space == NULL) {
3498 		ret = i915_gem_object_bind_to_gtt(obj, alignment,
3499 						  map_and_fenceable);
3500 		if (ret)
3501 			return ret;
3502 	}
3503 
3504 	if (!obj->has_global_gtt_mapping && map_and_fenceable)
3505 		i915_gem_gtt_bind_object(obj, obj->cache_level);
3506 
3507 	obj->pin_count++;
3508 	obj->pin_mappable |= map_and_fenceable;
3509 
3510 	return 0;
3511 }
3512 
3513 void
i915_gem_object_unpin(struct drm_i915_gem_object * obj)3514 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3515 {
3516 
3517 	KASSERT(obj->pin_count != 0, ("zero pin count"));
3518 	KASSERT(obj->gtt_space != NULL, ("No gtt mapping"));
3519 
3520 	if (--obj->pin_count == 0)
3521 		obj->pin_mappable = false;
3522 }
3523 
3524 int
i915_gem_pin_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3525 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3526 		   struct drm_file *file)
3527 {
3528 	struct drm_i915_gem_pin *args = data;
3529 	struct drm_i915_gem_object *obj;
3530 	struct drm_gem_object *gobj;
3531 	int ret;
3532 
3533 	ret = i915_mutex_lock_interruptible(dev);
3534 	if (ret)
3535 		return ret;
3536 
3537 	gobj = drm_gem_object_lookup(dev, file, args->handle);
3538 	if (gobj == NULL) {
3539 		ret = -ENOENT;
3540 		goto unlock;
3541 	}
3542 	obj = to_intel_bo(gobj);
3543 
3544 	if (obj->madv != I915_MADV_WILLNEED) {
3545 		DRM_ERROR("Attempting to pin a purgeable buffer\n");
3546 		ret = -EINVAL;
3547 		goto out;
3548 	}
3549 
3550 	if (obj->pin_filp != NULL && obj->pin_filp != file) {
3551 		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3552 			  args->handle);
3553 		ret = -EINVAL;
3554 		goto out;
3555 	}
3556 
3557 	obj->user_pin_count++;
3558 	obj->pin_filp = file;
3559 	if (obj->user_pin_count == 1) {
3560 		ret = i915_gem_object_pin(obj, args->alignment, true);
3561 		if (ret)
3562 			goto out;
3563 	}
3564 
3565 	/* XXX - flush the CPU caches for pinned objects
3566 	 * as the X server doesn't manage domains yet
3567 	 */
3568 	i915_gem_object_flush_cpu_write_domain(obj);
3569 	args->offset = obj->gtt_offset;
3570 out:
3571 	drm_gem_object_unreference(&obj->base);
3572 unlock:
3573 	DRM_UNLOCK(dev);
3574 	return ret;
3575 }
3576 
3577 int
i915_gem_unpin_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3578 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3579 		     struct drm_file *file)
3580 {
3581 	struct drm_i915_gem_pin *args = data;
3582 	struct drm_i915_gem_object *obj;
3583 	int ret;
3584 
3585 	ret = i915_mutex_lock_interruptible(dev);
3586 	if (ret)
3587 		return ret;
3588 
3589 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3590 	if (&obj->base == NULL) {
3591 		ret = -ENOENT;
3592 		goto unlock;
3593 	}
3594 
3595 	if (obj->pin_filp != file) {
3596 		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3597 			  args->handle);
3598 		ret = -EINVAL;
3599 		goto out;
3600 	}
3601 	obj->user_pin_count--;
3602 	if (obj->user_pin_count == 0) {
3603 		obj->pin_filp = NULL;
3604 		i915_gem_object_unpin(obj);
3605 	}
3606 
3607 out:
3608 	drm_gem_object_unreference(&obj->base);
3609 unlock:
3610 	DRM_UNLOCK(dev);
3611 	return ret;
3612 }
3613 
3614 int
i915_gem_busy_ioctl(struct drm_device * dev,void * data,struct drm_file * file)3615 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3616 		    struct drm_file *file)
3617 {
3618 	struct drm_i915_gem_busy *args = data;
3619 	struct drm_i915_gem_object *obj;
3620 	int ret;
3621 
3622 	ret = i915_mutex_lock_interruptible(dev);
3623 	if (ret)
3624 		return ret;
3625 
3626 	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3627 	if (&obj->base == NULL) {
3628 		ret = -ENOENT;
3629 		goto unlock;
3630 	}
3631 
3632 	args->busy = obj->active;
3633 	if (args->busy) {
3634 		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3635 			ret = i915_gem_flush_ring(obj->ring,
3636 			    0, obj->base.write_domain);
3637 		} else {
3638 			ret = i915_gem_check_olr(obj->ring,
3639 						 obj->last_rendering_seqno);
3640 		}
3641 
3642 		i915_gem_retire_requests_ring(obj->ring);
3643 		args->busy = obj->active;
3644 	}
3645 
3646 	drm_gem_object_unreference(&obj->base);
3647 unlock:
3648 	DRM_UNLOCK(dev);
3649 	return ret;
3650 }
3651 
3652 int
i915_gem_throttle_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)3653 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3654 			struct drm_file *file_priv)
3655 {
3656 	return i915_gem_ring_throttle(dev, file_priv);
3657 }
3658 
3659 int
i915_gem_madvise_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)3660 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3661 		       struct drm_file *file_priv)
3662 {
3663 	struct drm_i915_gem_madvise *args = data;
3664 	struct drm_i915_gem_object *obj;
3665 	int ret;
3666 
3667 	switch (args->madv) {
3668 	case I915_MADV_DONTNEED:
3669 	case I915_MADV_WILLNEED:
3670 	    break;
3671 	default:
3672 	    return -EINVAL;
3673 	}
3674 
3675 	ret = i915_mutex_lock_interruptible(dev);
3676 	if (ret)
3677 		return ret;
3678 
3679 	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3680 	if (&obj->base == NULL) {
3681 		ret = -ENOENT;
3682 		goto unlock;
3683 	}
3684 
3685 	if (obj->pin_count) {
3686 		ret = -EINVAL;
3687 		goto out;
3688 	}
3689 
3690 	if (obj->madv != I915_MADV_PURGED_INTERNAL)
3691 		obj->madv = args->madv;
3692 
3693 	/* if the object is no longer attached, discard its backing storage */
3694 	if (i915_gem_object_is_purgeable(obj) && obj->gtt_space == NULL)
3695 		i915_gem_object_truncate(obj);
3696 
3697 	args->retained = obj->madv != I915_MADV_PURGED_INTERNAL;
3698 
3699 out:
3700 	drm_gem_object_unreference(&obj->base);
3701 unlock:
3702 	DRM_UNLOCK(dev);
3703 	return ret;
3704 }
3705 
i915_gem_alloc_object(struct drm_device * dev,size_t size)3706 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3707 						  size_t size)
3708 {
3709 	struct drm_i915_private *dev_priv;
3710 	struct drm_i915_gem_object *obj;
3711 
3712 	dev_priv = dev->dev_private;
3713 
3714 	obj = malloc(sizeof(*obj), DRM_I915_GEM, M_WAITOK | M_ZERO);
3715 
3716 	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3717 		free(obj, DRM_I915_GEM);
3718 		return NULL;
3719 	}
3720 
3721 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3722 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3723 
3724 	if (HAS_LLC(dev)) {
3725 		/* On some devices, we can have the GPU use the LLC (the CPU
3726 		 * cache) for about a 10% performance improvement
3727 		 * compared to uncached.  Graphics requests other than
3728 		 * display scanout are coherent with the CPU in
3729 		 * accessing this cache.  This means in this mode we
3730 		 * don't need to clflush on the CPU side, and on the
3731 		 * GPU side we only need to flush internal caches to
3732 		 * get data visible to the CPU.
3733 		 *
3734 		 * However, we maintain the display planes as UC, and so
3735 		 * need to rebind when first used as such.
3736 		 */
3737 		obj->cache_level = I915_CACHE_LLC;
3738 	} else
3739 		obj->cache_level = I915_CACHE_NONE;
3740 	obj->base.driver_private = NULL;
3741 	obj->fence_reg = I915_FENCE_REG_NONE;
3742 	INIT_LIST_HEAD(&obj->mm_list);
3743 	INIT_LIST_HEAD(&obj->gtt_list);
3744 	INIT_LIST_HEAD(&obj->ring_list);
3745 	INIT_LIST_HEAD(&obj->exec_list);
3746 	INIT_LIST_HEAD(&obj->gpu_write_list);
3747 	obj->madv = I915_MADV_WILLNEED;
3748 	/* Avoid an unnecessary call to unbind on the first bind. */
3749 	obj->map_and_fenceable = true;
3750 
3751 	i915_gem_info_add_obj(dev_priv, size);
3752 
3753 	return obj;
3754 }
3755 
i915_gem_init_object(struct drm_gem_object * obj)3756 int i915_gem_init_object(struct drm_gem_object *obj)
3757 {
3758 	printf("i915_gem_init_object called\n");
3759 
3760 	return 0;
3761 }
3762 
i915_gem_free_object(struct drm_gem_object * gem_obj)3763 void i915_gem_free_object(struct drm_gem_object *gem_obj)
3764 {
3765 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3766 	struct drm_device *dev = obj->base.dev;
3767 	drm_i915_private_t *dev_priv = dev->dev_private;
3768 
3769 	CTR1(KTR_DRM, "object_destroy_tail %p", obj);
3770 
3771 	if (obj->phys_obj)
3772 		i915_gem_detach_phys_object(dev, obj);
3773 
3774 	obj->pin_count = 0;
3775 	if (i915_gem_object_unbind(obj) == -ERESTARTSYS) {
3776 		bool was_interruptible;
3777 
3778 		was_interruptible = dev_priv->mm.interruptible;
3779 		dev_priv->mm.interruptible = false;
3780 
3781 		if (i915_gem_object_unbind(obj))
3782 			printf("i915_gem_free_object: unbind\n");
3783 
3784 		dev_priv->mm.interruptible = was_interruptible;
3785 	}
3786 
3787 	drm_gem_free_mmap_offset(&obj->base);
3788 	drm_gem_object_release(&obj->base);
3789 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
3790 
3791 	free(obj->bit_17, DRM_I915_GEM);
3792 	free(obj, DRM_I915_GEM);
3793 }
3794 
3795 int
i915_gem_idle(struct drm_device * dev)3796 i915_gem_idle(struct drm_device *dev)
3797 {
3798 	drm_i915_private_t *dev_priv = dev->dev_private;
3799 	int ret;
3800 
3801 	DRM_LOCK(dev);
3802 
3803 	if (dev_priv->mm.suspended) {
3804 		DRM_UNLOCK(dev);
3805 		return 0;
3806 	}
3807 
3808 	ret = i915_gpu_idle(dev);
3809 	if (ret) {
3810 		DRM_UNLOCK(dev);
3811 		return ret;
3812 	}
3813 	i915_gem_retire_requests(dev);
3814 
3815 	/* Under UMS, be paranoid and evict. */
3816 	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3817 		ret = i915_gem_evict_everything(dev, false);
3818 		if (ret) {
3819 			DRM_UNLOCK(dev);
3820 			return ret;
3821 		}
3822 	}
3823 
3824 	i915_gem_reset_fences(dev);
3825 
3826 	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
3827 	 * We need to replace this with a semaphore, or something.
3828 	 * And not confound mm.suspended!
3829 	 */
3830 	dev_priv->mm.suspended = 1;
3831 	callout_stop(&dev_priv->hangcheck_timer);
3832 
3833 	i915_kernel_lost_context(dev);
3834 	i915_gem_cleanup_ringbuffer(dev);
3835 
3836 	DRM_UNLOCK(dev);
3837 
3838 	/* Cancel the retire work handler, which should be idle now. */
3839 	taskqueue_cancel_timeout(dev_priv->tq, &dev_priv->mm.retire_task, NULL);
3840 
3841 	return ret;
3842 }
3843 
i915_gem_init_swizzling(struct drm_device * dev)3844 void i915_gem_init_swizzling(struct drm_device *dev)
3845 {
3846 	drm_i915_private_t *dev_priv = dev->dev_private;
3847 
3848 	if (INTEL_INFO(dev)->gen < 5 ||
3849 	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
3850 		return;
3851 
3852 	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
3853 				 DISP_TILE_SURFACE_SWIZZLING);
3854 
3855 	if (IS_GEN5(dev))
3856 		return;
3857 
3858 	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3859 	if (IS_GEN6(dev))
3860 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3861 	else
3862 		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3863 }
3864 
3865 int
i915_gem_init_hw(struct drm_device * dev)3866 i915_gem_init_hw(struct drm_device *dev)
3867 {
3868 	drm_i915_private_t *dev_priv = dev->dev_private;
3869 	int ret;
3870 
3871 	i915_gem_init_swizzling(dev);
3872 
3873 	ret = intel_init_render_ring_buffer(dev);
3874 	if (ret)
3875 		return ret;
3876 
3877 	if (HAS_BSD(dev)) {
3878 		ret = intel_init_bsd_ring_buffer(dev);
3879 		if (ret)
3880 			goto cleanup_render_ring;
3881 	}
3882 
3883 	if (HAS_BLT(dev)) {
3884 		ret = intel_init_blt_ring_buffer(dev);
3885 		if (ret)
3886 			goto cleanup_bsd_ring;
3887 	}
3888 
3889 	dev_priv->next_seqno = 1;
3890 
3891 	/*
3892 	 * XXX: There was some w/a described somewhere suggesting loading
3893 	 * contexts before PPGTT.
3894 	 */
3895 	i915_gem_context_init(dev);
3896 	i915_gem_init_ppgtt(dev);
3897 
3898 	return 0;
3899 
3900 cleanup_bsd_ring:
3901 	intel_cleanup_ring_buffer(&dev_priv->rings[VCS]);
3902 cleanup_render_ring:
3903 	intel_cleanup_ring_buffer(&dev_priv->rings[RCS]);
3904 	return ret;
3905 }
3906 
3907 static bool
intel_enable_ppgtt(struct drm_device * dev)3908 intel_enable_ppgtt(struct drm_device *dev)
3909 {
3910 	if (i915_enable_ppgtt >= 0)
3911 		return i915_enable_ppgtt;
3912 
3913 	/* Disable ppgtt on SNB if VT-d is on. */
3914 	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled)
3915 		return false;
3916 
3917 	return true;
3918 }
3919 
i915_gem_init(struct drm_device * dev)3920 int i915_gem_init(struct drm_device *dev)
3921 {
3922 	struct drm_i915_private *dev_priv = dev->dev_private;
3923 	unsigned long gtt_size, mappable_size;
3924 	int ret;
3925 
3926 	gtt_size = dev_priv->mm.gtt.gtt_total_entries << PAGE_SHIFT;
3927 	mappable_size = dev_priv->mm.gtt.gtt_mappable_entries << PAGE_SHIFT;
3928 
3929 	DRM_LOCK(dev);
3930 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3931 		/* PPGTT pdes are stolen from global gtt ptes, so shrink the
3932 		 * aperture accordingly when using aliasing ppgtt. */
3933 		gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3934 
3935 		i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3936 
3937 		ret = i915_gem_init_aliasing_ppgtt(dev);
3938 		if (ret) {
3939 			DRM_UNLOCK(dev);
3940 			return ret;
3941 		}
3942 	} else {
3943 		/* Let GEM Manage all of the aperture.
3944 		 *
3945 		 * However, leave one page at the end still bound to the scratch
3946 		 * page.  There are a number of places where the hardware
3947 		 * apparently prefetches past the end of the object, and we've
3948 		 * seen multiple hangs with the GPU head pointer stuck in a
3949 		 * batchbuffer bound at the last page of the aperture.  One page
3950 		 * should be enough to keep any prefetching inside of the
3951 		 * aperture.
3952 		 */
3953 		i915_gem_init_global_gtt(dev, 0, mappable_size,
3954 					 gtt_size);
3955 	}
3956 
3957 	ret = i915_gem_init_hw(dev);
3958 	DRM_UNLOCK(dev);
3959 	if (ret) {
3960 		i915_gem_cleanup_aliasing_ppgtt(dev);
3961 		return ret;
3962 	}
3963 
3964 	/* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3965 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
3966 		dev_priv->dri1.allow_batchbuffer = 1;
3967 	return 0;
3968 }
3969 
3970 void
i915_gem_cleanup_ringbuffer(struct drm_device * dev)3971 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3972 {
3973 	drm_i915_private_t *dev_priv = dev->dev_private;
3974 	struct intel_ring_buffer *ring;
3975 	int i;
3976 
3977 	for_each_ring(ring, dev_priv, i)
3978 		intel_cleanup_ring_buffer(ring);
3979 }
3980 
3981 int
i915_gem_entervt_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)3982 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3983 		       struct drm_file *file_priv)
3984 {
3985 	drm_i915_private_t *dev_priv = dev->dev_private;
3986 	int ret;
3987 
3988 	if (drm_core_check_feature(dev, DRIVER_MODESET))
3989 		return 0;
3990 
3991 	if (atomic_load_acq_int(&dev_priv->mm.wedged) != 0) {
3992 		DRM_ERROR("Reenabling wedged hardware, good luck\n");
3993 		atomic_store_rel_int(&dev_priv->mm.wedged, 0);
3994 	}
3995 
3996 	DRM_LOCK(dev);
3997 	dev_priv->mm.suspended = 0;
3998 
3999 	ret = i915_gem_init_hw(dev);
4000 	if (ret != 0) {
4001 		DRM_UNLOCK(dev);
4002 		return ret;
4003 	}
4004 
4005 	KASSERT(list_empty(&dev_priv->mm.active_list), ("active list"));
4006 	KASSERT(list_empty(&dev_priv->mm.flushing_list), ("flushing list"));
4007 	KASSERT(list_empty(&dev_priv->mm.inactive_list), ("inactive list"));
4008 	DRM_UNLOCK(dev);
4009 
4010 	ret = drm_irq_install(dev);
4011 	if (ret)
4012 		goto cleanup_ringbuffer;
4013 
4014 	return 0;
4015 
4016 cleanup_ringbuffer:
4017 	DRM_LOCK(dev);
4018 	i915_gem_cleanup_ringbuffer(dev);
4019 	dev_priv->mm.suspended = 1;
4020 	DRM_UNLOCK(dev);
4021 
4022 	return ret;
4023 }
4024 
4025 int
i915_gem_leavevt_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)4026 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4027 		       struct drm_file *file_priv)
4028 {
4029 	if (drm_core_check_feature(dev, DRIVER_MODESET))
4030 		return 0;
4031 
4032 	drm_irq_uninstall(dev);
4033 	return i915_gem_idle(dev);
4034 }
4035 
4036 void
i915_gem_lastclose(struct drm_device * dev)4037 i915_gem_lastclose(struct drm_device *dev)
4038 {
4039 	int ret;
4040 
4041 	if (drm_core_check_feature(dev, DRIVER_MODESET))
4042 		return;
4043 
4044 	ret = i915_gem_idle(dev);
4045 	if (ret)
4046 		DRM_ERROR("failed to idle hardware: %d\n", ret);
4047 }
4048 
4049 static void
init_ring_lists(struct intel_ring_buffer * ring)4050 init_ring_lists(struct intel_ring_buffer *ring)
4051 {
4052 	INIT_LIST_HEAD(&ring->active_list);
4053 	INIT_LIST_HEAD(&ring->request_list);
4054 	INIT_LIST_HEAD(&ring->gpu_write_list);
4055 }
4056 
4057 void
i915_gem_load(struct drm_device * dev)4058 i915_gem_load(struct drm_device *dev)
4059 {
4060 	int i;
4061 	drm_i915_private_t *dev_priv = dev->dev_private;
4062 
4063 	INIT_LIST_HEAD(&dev_priv->mm.active_list);
4064 	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4065 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4066 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4067 	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
4068 	for (i = 0; i < I915_NUM_RINGS; i++)
4069 		init_ring_lists(&dev_priv->rings[i]);
4070 	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4071 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4072 	TIMEOUT_TASK_INIT(dev_priv->tq, &dev_priv->mm.retire_task, 0,
4073 	    i915_gem_retire_task_handler, dev_priv);
4074 	dev_priv->error_completion = 0;
4075 
4076 	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4077 	if (IS_GEN3(dev)) {
4078 		I915_WRITE(MI_ARB_STATE,
4079 			   _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
4080 	}
4081 
4082 	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4083 
4084 	/* Old X drivers will take 0-2 for front, back, depth buffers */
4085 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
4086 		dev_priv->fence_reg_start = 3;
4087 
4088 	if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4089 		dev_priv->num_fence_regs = 16;
4090 	else
4091 		dev_priv->num_fence_regs = 8;
4092 
4093 	/* Initialize fence registers to zero */
4094 	i915_gem_reset_fences(dev);
4095 
4096 	i915_gem_detect_bit_6_swizzle(dev);
4097 	dev_priv->mm.interruptible = true;
4098 
4099 	dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem,
4100 	    i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY);
4101 }
4102 
4103 void
i915_gem_unload(struct drm_device * dev)4104 i915_gem_unload(struct drm_device *dev)
4105 {
4106 	struct drm_i915_private *dev_priv;
4107 
4108 	dev_priv = dev->dev_private;
4109 	EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem);
4110 }
4111 
4112 /*
4113  * Create a physically contiguous memory object for this object
4114  * e.g. for cursor + overlay regs
4115  */
i915_gem_init_phys_object(struct drm_device * dev,int id,int size,int align)4116 static int i915_gem_init_phys_object(struct drm_device *dev,
4117 				     int id, int size, int align)
4118 {
4119 	drm_i915_private_t *dev_priv = dev->dev_private;
4120 	struct drm_i915_gem_phys_object *phys_obj;
4121 	int ret;
4122 
4123 	if (dev_priv->mm.phys_objs[id - 1] || !size)
4124 		return 0;
4125 
4126 	phys_obj = malloc(sizeof(struct drm_i915_gem_phys_object),
4127 	    DRM_I915_GEM, M_WAITOK | M_ZERO);
4128 
4129 	phys_obj->id = id;
4130 
4131 	phys_obj->handle = drm_pci_alloc(dev, size, align, BUS_SPACE_MAXADDR);
4132 	if (!phys_obj->handle) {
4133 		ret = -ENOMEM;
4134 		goto kfree_obj;
4135 	}
4136 	pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr,
4137 	    size / PAGE_SIZE, PAT_WRITE_COMBINING);
4138 
4139 	dev_priv->mm.phys_objs[id - 1] = phys_obj;
4140 
4141 	return 0;
4142 kfree_obj:
4143 	free(phys_obj, DRM_I915_GEM);
4144 	return ret;
4145 }
4146 
i915_gem_free_phys_object(struct drm_device * dev,int id)4147 static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4148 {
4149 	drm_i915_private_t *dev_priv = dev->dev_private;
4150 	struct drm_i915_gem_phys_object *phys_obj;
4151 
4152 	if (!dev_priv->mm.phys_objs[id - 1])
4153 		return;
4154 
4155 	phys_obj = dev_priv->mm.phys_objs[id - 1];
4156 	if (phys_obj->cur_obj) {
4157 		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4158 	}
4159 
4160 	drm_pci_free(dev, phys_obj->handle);
4161 	free(phys_obj, DRM_I915_GEM);
4162 	dev_priv->mm.phys_objs[id - 1] = NULL;
4163 }
4164 
i915_gem_free_all_phys_object(struct drm_device * dev)4165 void i915_gem_free_all_phys_object(struct drm_device *dev)
4166 {
4167 	int i;
4168 
4169 	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4170 		i915_gem_free_phys_object(dev, i);
4171 }
4172 
i915_gem_detach_phys_object(struct drm_device * dev,struct drm_i915_gem_object * obj)4173 void i915_gem_detach_phys_object(struct drm_device *dev,
4174 				 struct drm_i915_gem_object *obj)
4175 {
4176 	vm_page_t page;
4177 	struct sf_buf *sf;
4178 	char *vaddr, *dst;
4179 	int i, page_count;
4180 
4181 	if (!obj->phys_obj)
4182 		return;
4183 	vaddr = obj->phys_obj->handle->vaddr;
4184 
4185 	page_count = obj->base.size / PAGE_SIZE;
4186 	VM_OBJECT_WLOCK(obj->base.vm_obj);
4187 	for (i = 0; i < page_count; i++) {
4188 		page = i915_gem_wire_page(obj->base.vm_obj, i, NULL);
4189 		if (page == NULL)
4190 			continue; /* XXX */
4191 
4192 		VM_OBJECT_WUNLOCK(obj->base.vm_obj);
4193 		sf = sf_buf_alloc(page, 0);
4194 		if (sf != NULL) {
4195 			dst = (char *)sf_buf_kva(sf);
4196 			memcpy(dst, vaddr + IDX_TO_OFF(i), PAGE_SIZE);
4197 			sf_buf_free(sf);
4198 		}
4199 		drm_clflush_pages(&page, 1);
4200 
4201 		VM_OBJECT_WLOCK(obj->base.vm_obj);
4202 		vm_page_reference(page);
4203 		vm_page_lock(page);
4204 		vm_page_dirty(page);
4205 		vm_page_unwire(page, PQ_INACTIVE);
4206 		vm_page_unlock(page);
4207 		atomic_add_long(&i915_gem_wired_pages_cnt, -1);
4208 	}
4209 	VM_OBJECT_WUNLOCK(obj->base.vm_obj);
4210 	intel_gtt_chipset_flush();
4211 
4212 	obj->phys_obj->cur_obj = NULL;
4213 	obj->phys_obj = NULL;
4214 }
4215 
4216 int
i915_gem_attach_phys_object(struct drm_device * dev,struct drm_i915_gem_object * obj,int id,int align)4217 i915_gem_attach_phys_object(struct drm_device *dev,
4218 			    struct drm_i915_gem_object *obj,
4219 			    int id,
4220 			    int align)
4221 {
4222 	drm_i915_private_t *dev_priv = dev->dev_private;
4223 	vm_page_t page;
4224 	struct sf_buf *sf;
4225 	char *dst, *src;
4226 	int ret = 0;
4227 	int page_count;
4228 	int i;
4229 
4230 	if (id > I915_MAX_PHYS_OBJECT)
4231 		return -EINVAL;
4232 
4233 	if (obj->phys_obj) {
4234 		if (obj->phys_obj->id == id)
4235 			return 0;
4236 		i915_gem_detach_phys_object(dev, obj);
4237 	}
4238 
4239 	/* create a new object */
4240 	if (!dev_priv->mm.phys_objs[id - 1]) {
4241 		ret = i915_gem_init_phys_object(dev, id,
4242 						obj->base.size, align);
4243 		if (ret) {
4244 			DRM_ERROR("failed to init phys object %d size: %zu\n",
4245 				  id, obj->base.size);
4246 			return ret;
4247 		}
4248 	}
4249 
4250 	/* bind to the object */
4251 	obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4252 	obj->phys_obj->cur_obj = obj;
4253 
4254 	page_count = obj->base.size / PAGE_SIZE;
4255 
4256 	VM_OBJECT_WLOCK(obj->base.vm_obj);
4257 	for (i = 0; i < page_count; i++) {
4258 		page = i915_gem_wire_page(obj->base.vm_obj, i, NULL);
4259 		if (page == NULL) {
4260 			ret = -EIO;
4261 			break;
4262 		}
4263 		VM_OBJECT_WUNLOCK(obj->base.vm_obj);
4264 		sf = sf_buf_alloc(page, 0);
4265 		src = (char *)sf_buf_kva(sf);
4266 		dst = (char *)obj->phys_obj->handle->vaddr + IDX_TO_OFF(i);
4267 		memcpy(dst, src, PAGE_SIZE);
4268 		sf_buf_free(sf);
4269 
4270 		VM_OBJECT_WLOCK(obj->base.vm_obj);
4271 
4272 		vm_page_reference(page);
4273 		vm_page_lock(page);
4274 		vm_page_unwire(page, PQ_INACTIVE);
4275 		vm_page_unlock(page);
4276 		atomic_add_long(&i915_gem_wired_pages_cnt, -1);
4277 	}
4278 	VM_OBJECT_WUNLOCK(obj->base.vm_obj);
4279 
4280 	return ret;
4281 }
4282 
4283 static int
i915_gem_phys_pwrite(struct drm_device * dev,struct drm_i915_gem_object * obj,struct drm_i915_gem_pwrite * args,struct drm_file * file_priv)4284 i915_gem_phys_pwrite(struct drm_device *dev,
4285 		     struct drm_i915_gem_object *obj,
4286 		     struct drm_i915_gem_pwrite *args,
4287 		     struct drm_file *file_priv)
4288 {
4289 	void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset;
4290 	char __user *user_data = to_user_ptr(args->data_ptr);
4291 
4292 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4293 		unsigned long unwritten;
4294 
4295 		/* The physical object once assigned is fixed for the lifetime
4296 		 * of the obj, so we can safely drop the lock and continue
4297 		 * to access vaddr.
4298 		 */
4299 		DRM_UNLOCK(dev);
4300 		unwritten = copy_from_user(vaddr, user_data, args->size);
4301 		DRM_LOCK(dev);
4302 		if (unwritten)
4303 			return -EFAULT;
4304 	}
4305 
4306 	i915_gem_chipset_flush(dev);
4307 	return 0;
4308 }
4309 
i915_gem_release(struct drm_device * dev,struct drm_file * file)4310 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4311 {
4312 	struct drm_i915_file_private *file_priv = file->driver_priv;
4313 
4314 	/* Clean up our request list when the client is going away, so that
4315 	 * later retire_requests won't dereference our soon-to-be-gone
4316 	 * file_priv.
4317 	 */
4318 	mtx_lock(&file_priv->mm.lck);
4319 	while (!list_empty(&file_priv->mm.request_list)) {
4320 		struct drm_i915_gem_request *request;
4321 
4322 		request = list_first_entry(&file_priv->mm.request_list,
4323 					   struct drm_i915_gem_request,
4324 					   client_list);
4325 		list_del(&request->client_list);
4326 		request->file_priv = NULL;
4327 	}
4328 	mtx_unlock(&file_priv->mm.lck);
4329 }
4330 
4331 static vm_page_t
i915_gem_wire_page(vm_object_t object,vm_pindex_t pindex,bool * fresh)4332 i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex, bool *fresh)
4333 {
4334 	vm_page_t page;
4335 	int rv;
4336 
4337 	VM_OBJECT_ASSERT_WLOCKED(object);
4338 	page = vm_page_grab(object, pindex, VM_ALLOC_NORMAL);
4339 	if (page->valid != VM_PAGE_BITS_ALL) {
4340 		if (vm_pager_has_page(object, pindex, NULL, NULL)) {
4341 			rv = vm_pager_get_pages(object, &page, 1, NULL, NULL);
4342 			if (rv != VM_PAGER_OK) {
4343 				vm_page_lock(page);
4344 				vm_page_free(page);
4345 				vm_page_unlock(page);
4346 				return (NULL);
4347 			}
4348 			if (fresh != NULL)
4349 				*fresh = true;
4350 		} else {
4351 			pmap_zero_page(page);
4352 			page->valid = VM_PAGE_BITS_ALL;
4353 			page->dirty = 0;
4354 			if (fresh != NULL)
4355 				*fresh = false;
4356 		}
4357 	} else if (fresh != NULL) {
4358 		*fresh = false;
4359 	}
4360 	vm_page_lock(page);
4361 	vm_page_wire(page);
4362 	vm_page_unlock(page);
4363 	vm_page_xunbusy(page);
4364 	atomic_add_long(&i915_gem_wired_pages_cnt, 1);
4365 	return (page);
4366 }
4367 
4368 #undef __user
4369 #undef __force
4370 #undef __iomem
4371 #undef __must_check
4372 #undef to_user_ptr
4373 #undef offset_in_page
4374 #undef page_to_phys
4375