xref: /freebsd-11-stable/sys/dev/drm2/i915/intel_ringbuffer.c (revision 7d536dc855c85c15bf45f033d108a61b1f3cecc3)
1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <dev/drm2/drmP.h>
34 #include <dev/drm2/i915/i915_drv.h>
35 #include <dev/drm2/i915/i915_drm.h>
36 #include <dev/drm2/i915/intel_drv.h>
37 #include <sys/sched.h>
38 #include <sys/sf_buf.h>
39 
40 /*
41  * 965+ support PIPE_CONTROL commands, which provide finer grained control
42  * over cache flushing.
43  */
44 struct pipe_control {
45 	struct drm_i915_gem_object *obj;
46 	volatile u32 *cpu_page;
47 	u32 gtt_offset;
48 };
49 
ring_space(struct intel_ring_buffer * ring)50 static inline int ring_space(struct intel_ring_buffer *ring)
51 {
52 	int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
53 	if (space < 0)
54 		space += ring->size;
55 	return space;
56 }
57 
58 static int
gen2_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)59 gen2_render_ring_flush(struct intel_ring_buffer *ring,
60 		       u32	invalidate_domains,
61 		       u32	flush_domains)
62 {
63 	u32 cmd;
64 	int ret;
65 
66 	cmd = MI_FLUSH;
67 	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
68 		cmd |= MI_NO_WRITE_FLUSH;
69 
70 	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
71 		cmd |= MI_READ_FLUSH;
72 
73 	ret = intel_ring_begin(ring, 2);
74 	if (ret)
75 		return ret;
76 
77 	intel_ring_emit(ring, cmd);
78 	intel_ring_emit(ring, MI_NOOP);
79 	intel_ring_advance(ring);
80 
81 	return 0;
82 }
83 
84 static int
gen4_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)85 gen4_render_ring_flush(struct intel_ring_buffer *ring,
86 		       u32	invalidate_domains,
87 		       u32	flush_domains)
88 {
89 	struct drm_device *dev = ring->dev;
90 	u32 cmd;
91 	int ret;
92 
93 	/*
94 	 * read/write caches:
95 	 *
96 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
97 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
98 	 * also flushed at 2d versus 3d pipeline switches.
99 	 *
100 	 * read-only caches:
101 	 *
102 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
103 	 * MI_READ_FLUSH is set, and is always flushed on 965.
104 	 *
105 	 * I915_GEM_DOMAIN_COMMAND may not exist?
106 	 *
107 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
108 	 * invalidated when MI_EXE_FLUSH is set.
109 	 *
110 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
111 	 * invalidated with every MI_FLUSH.
112 	 *
113 	 * TLBs:
114 	 *
115 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
116 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
117 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
118 	 * are flushed at any MI_FLUSH.
119 	 */
120 
121 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
122 	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
123 		cmd &= ~MI_NO_WRITE_FLUSH;
124 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
125 		cmd |= MI_EXE_FLUSH;
126 
127 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
128 	    (IS_G4X(dev) || IS_GEN5(dev)))
129 		cmd |= MI_INVALIDATE_ISP;
130 
131 	ret = intel_ring_begin(ring, 2);
132 	if (ret)
133 		return ret;
134 
135 	intel_ring_emit(ring, cmd);
136 	intel_ring_emit(ring, MI_NOOP);
137 	intel_ring_advance(ring);
138 
139 	return 0;
140 }
141 
142 /**
143  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
144  * implementing two workarounds on gen6.  From section 1.4.7.1
145  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
146  *
147  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
148  * produced by non-pipelined state commands), software needs to first
149  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
150  * 0.
151  *
152  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
153  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
154  *
155  * And the workaround for these two requires this workaround first:
156  *
157  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
158  * BEFORE the pipe-control with a post-sync op and no write-cache
159  * flushes.
160  *
161  * And this last workaround is tricky because of the requirements on
162  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
163  * volume 2 part 1:
164  *
165  *     "1 of the following must also be set:
166  *      - Render Target Cache Flush Enable ([12] of DW1)
167  *      - Depth Cache Flush Enable ([0] of DW1)
168  *      - Stall at Pixel Scoreboard ([1] of DW1)
169  *      - Depth Stall ([13] of DW1)
170  *      - Post-Sync Operation ([13] of DW1)
171  *      - Notify Enable ([8] of DW1)"
172  *
173  * The cache flushes require the workaround flush that triggered this
174  * one, so we can't use it.  Depth stall would trigger the same.
175  * Post-sync nonzero is what triggered this second workaround, so we
176  * can't use that one either.  Notify enable is IRQs, which aren't
177  * really our business.  That leaves only stall at scoreboard.
178  */
179 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)180 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
181 {
182 	struct pipe_control *pc = ring->private;
183 	u32 scratch_addr = pc->gtt_offset + 128;
184 	int ret;
185 
186 
187 	ret = intel_ring_begin(ring, 6);
188 	if (ret)
189 		return ret;
190 
191 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
192 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
193 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
194 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
195 	intel_ring_emit(ring, 0); /* low dword */
196 	intel_ring_emit(ring, 0); /* high dword */
197 	intel_ring_emit(ring, MI_NOOP);
198 	intel_ring_advance(ring);
199 
200 	ret = intel_ring_begin(ring, 6);
201 	if (ret)
202 		return ret;
203 
204 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
205 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
206 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
207 	intel_ring_emit(ring, 0);
208 	intel_ring_emit(ring, 0);
209 	intel_ring_emit(ring, MI_NOOP);
210 	intel_ring_advance(ring);
211 
212 	return 0;
213 }
214 
215 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)216 gen6_render_ring_flush(struct intel_ring_buffer *ring,
217                          u32 invalidate_domains, u32 flush_domains)
218 {
219 	u32 flags = 0;
220 	struct pipe_control *pc = ring->private;
221 	u32 scratch_addr = pc->gtt_offset + 128;
222 	int ret;
223 
224 	/* Force SNB workarounds for PIPE_CONTROL flushes */
225 	ret = intel_emit_post_sync_nonzero_flush(ring);
226 	if (ret)
227 		return ret;
228 
229 	/* Just flush everything.  Experiments have shown that reducing the
230 	 * number of bits based on the write domains has little performance
231 	 * impact.
232 	 */
233 	if (flush_domains) {
234 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
235 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
236 		/*
237 		 * Ensure that any following seqno writes only happen
238 		 * when the render cache is indeed flushed.
239 		 */
240 		flags |= PIPE_CONTROL_CS_STALL;
241 	}
242 	if (invalidate_domains) {
243 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
244 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
245 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
246 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
247 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
248 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
249 		/*
250 		 * TLB invalidate requires a post-sync write.
251 		 */
252 		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
253 	}
254 
255 	ret = intel_ring_begin(ring, 4);
256 	if (ret)
257 		return ret;
258 
259 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
260 	intel_ring_emit(ring, flags);
261 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
262 	intel_ring_emit(ring, 0);
263 	intel_ring_advance(ring);
264 
265 	return 0;
266 }
267 
268 static int
gen7_render_ring_cs_stall_wa(struct intel_ring_buffer * ring)269 gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
270 {
271 	int ret;
272 
273 	ret = intel_ring_begin(ring, 4);
274 	if (ret)
275 		return ret;
276 
277 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
278 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
279 			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
280 	intel_ring_emit(ring, 0);
281 	intel_ring_emit(ring, 0);
282 	intel_ring_advance(ring);
283 
284 	return 0;
285 }
286 
287 static int
gen7_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)288 gen7_render_ring_flush(struct intel_ring_buffer *ring,
289 		       u32 invalidate_domains, u32 flush_domains)
290 {
291 	u32 flags = 0;
292 	struct pipe_control *pc = ring->private;
293 	u32 scratch_addr = pc->gtt_offset + 128;
294 	int ret;
295 
296 	/*
297 	 * Ensure that any following seqno writes only happen when the render
298 	 * cache is indeed flushed.
299 	 *
300 	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
301 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
302 	 * don't try to be clever and just set it unconditionally.
303 	 */
304 	flags |= PIPE_CONTROL_CS_STALL;
305 
306 	/* Just flush everything.  Experiments have shown that reducing the
307 	 * number of bits based on the write domains has little performance
308 	 * impact.
309 	 */
310 	if (flush_domains) {
311 		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
312 		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
313 	}
314 	if (invalidate_domains) {
315 		flags |= PIPE_CONTROL_TLB_INVALIDATE;
316 		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
317 		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
318 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
319 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
320 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
321 		/*
322 		 * TLB invalidate requires a post-sync write.
323 		 */
324 		flags |= PIPE_CONTROL_QW_WRITE;
325 
326 		/* Workaround: we must issue a pipe_control with CS-stall bit
327 		 * set before a pipe_control command that has the state cache
328 		 * invalidate bit set. */
329 		gen7_render_ring_cs_stall_wa(ring);
330 	}
331 
332 	ret = intel_ring_begin(ring, 4);
333 	if (ret)
334 		return ret;
335 
336 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
337 	intel_ring_emit(ring, flags);
338 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
339 	intel_ring_emit(ring, 0);
340 	intel_ring_advance(ring);
341 
342 	return 0;
343 }
344 
ring_write_tail(struct intel_ring_buffer * ring,u32 value)345 static void ring_write_tail(struct intel_ring_buffer *ring,
346 			    u32 value)
347 {
348 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
349 	I915_WRITE_TAIL(ring, value);
350 }
351 
intel_ring_get_active_head(struct intel_ring_buffer * ring)352 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
353 {
354 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
355 	u32 acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
356 			RING_ACTHD(ring->mmio_base) : ACTHD;
357 
358 	return I915_READ(acthd_reg);
359 }
360 
init_ring_common(struct intel_ring_buffer * ring)361 static int init_ring_common(struct intel_ring_buffer *ring)
362 {
363 	struct drm_device *dev = ring->dev;
364 	drm_i915_private_t *dev_priv = dev->dev_private;
365 	struct drm_i915_gem_object *obj = ring->obj;
366 	int ret = 0;
367 	u32 head;
368 
369 	if (HAS_FORCE_WAKE(dev))
370 		gen6_gt_force_wake_get(dev_priv);
371 
372 	/* Stop the ring if it's running. */
373 	I915_WRITE_CTL(ring, 0);
374 	I915_WRITE_HEAD(ring, 0);
375 	ring->write_tail(ring, 0);
376 
377 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
378 
379 	/* G45 ring initialization fails to reset head to zero */
380 	if (head != 0) {
381 		DRM_DEBUG_KMS("%s head not reset to zero "
382 			      "ctl %08x head %08x tail %08x start %08x\n",
383 			      ring->name,
384 			      I915_READ_CTL(ring),
385 			      I915_READ_HEAD(ring),
386 			      I915_READ_TAIL(ring),
387 			      I915_READ_START(ring));
388 
389 		I915_WRITE_HEAD(ring, 0);
390 
391 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
392 			DRM_ERROR("failed to set %s head to zero "
393 				  "ctl %08x head %08x tail %08x start %08x\n",
394 				  ring->name,
395 				  I915_READ_CTL(ring),
396 				  I915_READ_HEAD(ring),
397 				  I915_READ_TAIL(ring),
398 				  I915_READ_START(ring));
399 		}
400 	}
401 
402 	/* Initialize the ring. This must happen _after_ we've cleared the ring
403 	 * registers with the above sequence (the readback of the HEAD registers
404 	 * also enforces ordering), otherwise the hw might lose the new ring
405 	 * register values. */
406 	I915_WRITE_START(ring, obj->gtt_offset);
407 	I915_WRITE_CTL(ring,
408 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
409 			| RING_VALID);
410 
411 	/* If the head is still not zero, the ring is dead */
412 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
413 		     I915_READ_START(ring) == obj->gtt_offset &&
414 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
415 		DRM_ERROR("%s initialization failed "
416 				"ctl %08x head %08x tail %08x start %08x\n",
417 				ring->name,
418 				I915_READ_CTL(ring),
419 				I915_READ_HEAD(ring),
420 				I915_READ_TAIL(ring),
421 				I915_READ_START(ring));
422 		ret = -EIO;
423 		goto out;
424 	}
425 
426 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
427 		i915_kernel_lost_context(ring->dev);
428 	else {
429 		ring->head = I915_READ_HEAD(ring);
430 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
431 		ring->space = ring_space(ring);
432 		ring->last_retired_head = -1;
433 	}
434 
435 out:
436 	if (HAS_FORCE_WAKE(dev))
437 		gen6_gt_force_wake_put(dev_priv);
438 
439 	return ret;
440 }
441 
442 static int
init_pipe_control(struct intel_ring_buffer * ring)443 init_pipe_control(struct intel_ring_buffer *ring)
444 {
445 	struct pipe_control *pc;
446 	struct drm_i915_gem_object *obj;
447 	int ret;
448 
449 	if (ring->private)
450 		return 0;
451 
452 	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
453 	if (!pc)
454 		return -ENOMEM;
455 
456 	obj = i915_gem_alloc_object(ring->dev, 4096);
457 	if (obj == NULL) {
458 		DRM_ERROR("Failed to allocate seqno page\n");
459 		ret = -ENOMEM;
460 		goto err;
461 	}
462 
463 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
464 
465 	ret = i915_gem_object_pin(obj, 4096, true, false);
466 	if (ret)
467 		goto err_unref;
468 
469 	pc->gtt_offset = obj->gtt_offset;
470 	pc->cpu_page = (uint32_t *)kva_alloc(PAGE_SIZE);
471 	if (pc->cpu_page == NULL)
472 		goto err_unpin;
473 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
474 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
475 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE, FALSE);
476 
477 	pc->obj = obj;
478 	ring->private = pc;
479 	return 0;
480 
481 err_unpin:
482 	i915_gem_object_unpin(obj);
483 err_unref:
484 	drm_gem_object_unreference(&obj->base);
485 err:
486 	free(pc, DRM_I915_GEM);
487 	return ret;
488 }
489 
490 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)491 cleanup_pipe_control(struct intel_ring_buffer *ring)
492 {
493 	struct pipe_control *pc = ring->private;
494 	struct drm_i915_gem_object *obj;
495 
496 	if (!ring->private)
497 		return;
498 
499 	obj = pc->obj;
500 
501 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
502 	kva_free((uintptr_t)pc->cpu_page, PAGE_SIZE);
503 	i915_gem_object_unpin(obj);
504 	drm_gem_object_unreference(&obj->base);
505 
506 	free(pc, DRM_I915_GEM);
507 	ring->private = NULL;
508 }
509 
init_render_ring(struct intel_ring_buffer * ring)510 static int init_render_ring(struct intel_ring_buffer *ring)
511 {
512 	struct drm_device *dev = ring->dev;
513 	struct drm_i915_private *dev_priv = dev->dev_private;
514 	int ret = init_ring_common(ring);
515 
516 	if (INTEL_INFO(dev)->gen > 3)
517 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
518 
519 	/* We need to disable the AsyncFlip performance optimisations in order
520 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
521 	 * programmed to '1' on all products.
522 	 */
523 	if (INTEL_INFO(dev)->gen >= 6)
524 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
525 
526 	/* Required for the hardware to program scanline values for waiting */
527 	if (INTEL_INFO(dev)->gen == 6)
528 		I915_WRITE(GFX_MODE,
529 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
530 
531 	if (IS_GEN7(dev))
532 		I915_WRITE(GFX_MODE_GEN7,
533 			   _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
534 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
535 
536 	if (INTEL_INFO(dev)->gen >= 5) {
537 		ret = init_pipe_control(ring);
538 		if (ret)
539 			return ret;
540 	}
541 
542 	if (IS_GEN6(dev)) {
543 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
544 		 * "If this bit is set, STCunit will have LRA as replacement
545 		 *  policy. [...] This bit must be reset.  LRA replacement
546 		 *  policy is not supported."
547 		 */
548 		I915_WRITE(CACHE_MODE_0,
549 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
550 
551 		/* This is not explicitly set for GEN6, so read the register.
552 		 * see intel_ring_mi_set_context() for why we care.
553 		 * TODO: consider explicitly setting the bit for GEN5
554 		 */
555 		ring->itlb_before_ctx_switch =
556 			!!(I915_READ(GFX_MODE) & GFX_TLB_INVALIDATE_ALWAYS);
557 	}
558 
559 	if (INTEL_INFO(dev)->gen >= 6)
560 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
561 
562 	if (HAS_L3_GPU_CACHE(dev))
563 		I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
564 
565 	return ret;
566 }
567 
render_ring_cleanup(struct intel_ring_buffer * ring)568 static void render_ring_cleanup(struct intel_ring_buffer *ring)
569 {
570 	struct drm_device *dev = ring->dev;
571 
572 	if (!ring->private)
573 		return;
574 
575 	if (HAS_BROKEN_CS_TLB(dev))
576 		drm_gem_object_unreference(to_gem_object(ring->private));
577 
578 	cleanup_pipe_control(ring);
579 }
580 
581 static void
update_mboxes(struct intel_ring_buffer * ring,u32 mmio_offset)582 update_mboxes(struct intel_ring_buffer *ring,
583 	      u32 mmio_offset)
584 {
585 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
586 	intel_ring_emit(ring, mmio_offset);
587 	intel_ring_emit(ring, ring->outstanding_lazy_request);
588 }
589 
590 /**
591  * gen6_add_request - Update the semaphore mailbox registers
592  *
593  * @ring - ring that is adding a request
594  * @seqno - return seqno stuck into the ring
595  *
596  * Update the mailbox registers in the *other* rings with the current seqno.
597  * This acts like a signal in the canonical semaphore.
598  */
599 static int
gen6_add_request(struct intel_ring_buffer * ring)600 gen6_add_request(struct intel_ring_buffer *ring)
601 {
602 	u32 mbox1_reg;
603 	u32 mbox2_reg;
604 	int ret;
605 
606 	ret = intel_ring_begin(ring, 10);
607 	if (ret)
608 		return ret;
609 
610 	mbox1_reg = ring->signal_mbox[0];
611 	mbox2_reg = ring->signal_mbox[1];
612 
613 	update_mboxes(ring, mbox1_reg);
614 	update_mboxes(ring, mbox2_reg);
615 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
616 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
617 	intel_ring_emit(ring, ring->outstanding_lazy_request);
618 	intel_ring_emit(ring, MI_USER_INTERRUPT);
619 	intel_ring_advance(ring);
620 
621 	return 0;
622 }
623 
624 /**
625  * intel_ring_sync - sync the waiter to the signaller on seqno
626  *
627  * @waiter - ring that is waiting
628  * @signaller - ring which has, or will signal
629  * @seqno - seqno which the waiter will block on
630  */
631 static int
gen6_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)632 gen6_ring_sync(struct intel_ring_buffer *waiter,
633 	       struct intel_ring_buffer *signaller,
634 	       u32 seqno)
635 {
636 	int ret;
637 	u32 dw1 = MI_SEMAPHORE_MBOX |
638 		  MI_SEMAPHORE_COMPARE |
639 		  MI_SEMAPHORE_REGISTER;
640 
641 	/* Throughout all of the GEM code, seqno passed implies our current
642 	 * seqno is >= the last seqno executed. However for hardware the
643 	 * comparison is strictly greater than.
644 	 */
645 	seqno -= 1;
646 
647 	WARN_ON(signaller->semaphore_register[waiter->id] ==
648 		MI_SEMAPHORE_SYNC_INVALID);
649 
650 	ret = intel_ring_begin(waiter, 4);
651 	if (ret)
652 		return ret;
653 
654 	intel_ring_emit(waiter,
655 			dw1 | signaller->semaphore_register[waiter->id]);
656 	intel_ring_emit(waiter, seqno);
657 	intel_ring_emit(waiter, 0);
658 	intel_ring_emit(waiter, MI_NOOP);
659 	intel_ring_advance(waiter);
660 
661 	return 0;
662 }
663 
664 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
665 do {									\
666 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
667 		 PIPE_CONTROL_DEPTH_STALL);				\
668 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
669 	intel_ring_emit(ring__, 0);							\
670 	intel_ring_emit(ring__, 0);							\
671 } while (0)
672 
673 static int
pc_render_add_request(struct intel_ring_buffer * ring)674 pc_render_add_request(struct intel_ring_buffer *ring)
675 {
676 	struct pipe_control *pc = ring->private;
677 	u32 scratch_addr = pc->gtt_offset + 128;
678 	int ret;
679 
680 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
681 	 * incoherent with writes to memory, i.e. completely fubar,
682 	 * so we need to use PIPE_NOTIFY instead.
683 	 *
684 	 * However, we also need to workaround the qword write
685 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
686 	 * memory before requesting an interrupt.
687 	 */
688 	ret = intel_ring_begin(ring, 32);
689 	if (ret)
690 		return ret;
691 
692 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
693 			PIPE_CONTROL_WRITE_FLUSH |
694 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
695 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
696 	intel_ring_emit(ring, ring->outstanding_lazy_request);
697 	intel_ring_emit(ring, 0);
698 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
699 	scratch_addr += 128; /* write to separate cachelines */
700 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
701 	scratch_addr += 128;
702 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
703 	scratch_addr += 128;
704 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
705 	scratch_addr += 128;
706 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
707 	scratch_addr += 128;
708 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
709 
710 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
711 			PIPE_CONTROL_WRITE_FLUSH |
712 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
713 			PIPE_CONTROL_NOTIFY);
714 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
715 	intel_ring_emit(ring, ring->outstanding_lazy_request);
716 	intel_ring_emit(ring, 0);
717 	intel_ring_advance(ring);
718 
719 	return 0;
720 }
721 
722 static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)723 gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
724 {
725 	/* Workaround to force correct ordering between irq and seqno writes on
726 	 * ivb (and maybe also on snb) by reading from a CS register (like
727 	 * ACTHD) before reading the status page. */
728 	if (!lazy_coherency)
729 		intel_ring_get_active_head(ring);
730 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
731 }
732 
733 static u32
ring_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)734 ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
735 {
736 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
737 }
738 
739 static u32
pc_render_get_seqno(struct intel_ring_buffer * ring,bool lazy_coherency)740 pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
741 {
742 	struct pipe_control *pc = ring->private;
743 	return pc->cpu_page[0];
744 }
745 
746 static bool
gen5_ring_get_irq(struct intel_ring_buffer * ring)747 gen5_ring_get_irq(struct intel_ring_buffer *ring)
748 {
749 	struct drm_device *dev = ring->dev;
750 	drm_i915_private_t *dev_priv = dev->dev_private;
751 
752 	if (!dev->irq_enabled)
753 		return false;
754 
755 	mtx_lock(&dev_priv->irq_lock);
756 	if (ring->irq_refcount++ == 0) {
757 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
758 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
759 		POSTING_READ(GTIMR);
760 	}
761 	mtx_unlock(&dev_priv->irq_lock);
762 
763 	return true;
764 }
765 
766 static void
gen5_ring_put_irq(struct intel_ring_buffer * ring)767 gen5_ring_put_irq(struct intel_ring_buffer *ring)
768 {
769 	struct drm_device *dev = ring->dev;
770 	drm_i915_private_t *dev_priv = dev->dev_private;
771 
772 	mtx_lock(&dev_priv->irq_lock);
773 	if (--ring->irq_refcount == 0) {
774 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
775 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
776 		POSTING_READ(GTIMR);
777 	}
778 	mtx_unlock(&dev_priv->irq_lock);
779 }
780 
781 static bool
i9xx_ring_get_irq(struct intel_ring_buffer * ring)782 i9xx_ring_get_irq(struct intel_ring_buffer *ring)
783 {
784 	struct drm_device *dev = ring->dev;
785 	drm_i915_private_t *dev_priv = dev->dev_private;
786 
787 	if (!dev->irq_enabled)
788 		return false;
789 
790 	mtx_lock(&dev_priv->irq_lock);
791 	if (ring->irq_refcount++ == 0) {
792 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
793 		I915_WRITE(IMR, dev_priv->irq_mask);
794 		POSTING_READ(IMR);
795 	}
796 	mtx_unlock(&dev_priv->irq_lock);
797 
798 	return true;
799 }
800 
801 static void
i9xx_ring_put_irq(struct intel_ring_buffer * ring)802 i9xx_ring_put_irq(struct intel_ring_buffer *ring)
803 {
804 	struct drm_device *dev = ring->dev;
805 	drm_i915_private_t *dev_priv = dev->dev_private;
806 
807 	mtx_lock(&dev_priv->irq_lock);
808 	if (--ring->irq_refcount == 0) {
809 		dev_priv->irq_mask |= ring->irq_enable_mask;
810 		I915_WRITE(IMR, dev_priv->irq_mask);
811 		POSTING_READ(IMR);
812 	}
813 	mtx_unlock(&dev_priv->irq_lock);
814 }
815 
816 static bool
i8xx_ring_get_irq(struct intel_ring_buffer * ring)817 i8xx_ring_get_irq(struct intel_ring_buffer *ring)
818 {
819 	struct drm_device *dev = ring->dev;
820 	drm_i915_private_t *dev_priv = dev->dev_private;
821 
822 	if (!dev->irq_enabled)
823 		return false;
824 
825 	mtx_lock(&dev_priv->irq_lock);
826 	if (ring->irq_refcount++ == 0) {
827 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
828 		I915_WRITE16(IMR, dev_priv->irq_mask);
829 		POSTING_READ16(IMR);
830 	}
831 	mtx_unlock(&dev_priv->irq_lock);
832 
833 	return true;
834 }
835 
836 static void
i8xx_ring_put_irq(struct intel_ring_buffer * ring)837 i8xx_ring_put_irq(struct intel_ring_buffer *ring)
838 {
839 	struct drm_device *dev = ring->dev;
840 	drm_i915_private_t *dev_priv = dev->dev_private;
841 
842 	mtx_lock(&dev_priv->irq_lock);
843 	if (--ring->irq_refcount == 0) {
844 		dev_priv->irq_mask |= ring->irq_enable_mask;
845 		I915_WRITE16(IMR, dev_priv->irq_mask);
846 		POSTING_READ16(IMR);
847 	}
848 	mtx_unlock(&dev_priv->irq_lock);
849 }
850 
intel_ring_setup_status_page(struct intel_ring_buffer * ring)851 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
852 {
853 	struct drm_device *dev = ring->dev;
854 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
855 	u32 mmio = 0;
856 
857 	/* The ring status page addresses are no longer next to the rest of
858 	 * the ring registers as of gen7.
859 	 */
860 	if (IS_GEN7(dev)) {
861 		switch (ring->id) {
862 		case RCS:
863 			mmio = RENDER_HWS_PGA_GEN7;
864 			break;
865 		case BCS:
866 			mmio = BLT_HWS_PGA_GEN7;
867 			break;
868 		case VCS:
869 			mmio = BSD_HWS_PGA_GEN7;
870 			break;
871 		}
872 	} else if (IS_GEN6(ring->dev)) {
873 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
874 	} else {
875 		mmio = RING_HWS_PGA(ring->mmio_base);
876 	}
877 
878 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
879 	POSTING_READ(mmio);
880 }
881 
882 static int
bsd_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)883 bsd_ring_flush(struct intel_ring_buffer *ring,
884 	       u32     invalidate_domains,
885 	       u32     flush_domains)
886 {
887 	int ret;
888 
889 	ret = intel_ring_begin(ring, 2);
890 	if (ret)
891 		return ret;
892 
893 	intel_ring_emit(ring, MI_FLUSH);
894 	intel_ring_emit(ring, MI_NOOP);
895 	intel_ring_advance(ring);
896 	return 0;
897 }
898 
899 static int
i9xx_add_request(struct intel_ring_buffer * ring)900 i9xx_add_request(struct intel_ring_buffer *ring)
901 {
902 	int ret;
903 
904 	ret = intel_ring_begin(ring, 4);
905 	if (ret)
906 		return ret;
907 
908 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
909 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
910 	intel_ring_emit(ring, ring->outstanding_lazy_request);
911 	intel_ring_emit(ring, MI_USER_INTERRUPT);
912 	intel_ring_advance(ring);
913 
914 	return 0;
915 }
916 
917 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring)918 gen6_ring_get_irq(struct intel_ring_buffer *ring)
919 {
920 	struct drm_device *dev = ring->dev;
921 	drm_i915_private_t *dev_priv = dev->dev_private;
922 
923 	if (!dev->irq_enabled)
924 	       return false;
925 
926 	/* It looks like we need to prevent the gt from suspending while waiting
927 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
928 	 * blt/bsd rings on ivb. */
929 	gen6_gt_force_wake_get(dev_priv);
930 
931 	mtx_lock(&dev_priv->irq_lock);
932 	if (ring->irq_refcount++ == 0) {
933 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
934 			I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
935 						GEN6_RENDER_L3_PARITY_ERROR));
936 		else
937 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
938 		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
939 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
940 		POSTING_READ(GTIMR);
941 	}
942 	mtx_unlock(&dev_priv->irq_lock);
943 
944 	return true;
945 }
946 
947 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring)948 gen6_ring_put_irq(struct intel_ring_buffer *ring)
949 {
950 	struct drm_device *dev = ring->dev;
951 	drm_i915_private_t *dev_priv = dev->dev_private;
952 
953 	mtx_lock(&dev_priv->irq_lock);
954 	if (--ring->irq_refcount == 0) {
955 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
956 			I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
957 		else
958 			I915_WRITE_IMR(ring, ~0);
959 		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
960 		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
961 		POSTING_READ(GTIMR);
962 	}
963 	mtx_unlock(&dev_priv->irq_lock);
964 
965 	gen6_gt_force_wake_put(dev_priv);
966 }
967 
968 static int
i965_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 length,unsigned flags)969 i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
970 			 u32 offset, u32 length,
971 			 unsigned flags)
972 {
973 	int ret;
974 
975 	ret = intel_ring_begin(ring, 2);
976 	if (ret)
977 		return ret;
978 
979 	intel_ring_emit(ring,
980 			MI_BATCH_BUFFER_START |
981 			MI_BATCH_GTT |
982 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
983 	intel_ring_emit(ring, offset);
984 	intel_ring_advance(ring);
985 
986 	return 0;
987 }
988 
989 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
990 #define I830_BATCH_LIMIT (256*1024)
991 static int
i830_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)992 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
993 				u32 offset, u32 len,
994 				unsigned flags)
995 {
996 	int ret;
997 
998 	if (flags & I915_DISPATCH_PINNED) {
999 		ret = intel_ring_begin(ring, 4);
1000 		if (ret)
1001 			return ret;
1002 
1003 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1004 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1005 		intel_ring_emit(ring, offset + len - 8);
1006 		intel_ring_emit(ring, MI_NOOP);
1007 		intel_ring_advance(ring);
1008 	} else {
1009 		struct drm_i915_gem_object *obj = ring->private;
1010 		u32 cs_offset = obj->gtt_offset;
1011 
1012 		if (len > I830_BATCH_LIMIT)
1013 			return -ENOSPC;
1014 
1015 		ret = intel_ring_begin(ring, 9+3);
1016 		if (ret)
1017 			return ret;
1018 		/* Blit the batch (which has now all relocs applied) to the stable batch
1019 		 * scratch bo area (so that the CS never stumbles over its tlb
1020 		 * invalidation bug) ... */
1021 		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1022 				XY_SRC_COPY_BLT_WRITE_ALPHA |
1023 				XY_SRC_COPY_BLT_WRITE_RGB);
1024 		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1025 		intel_ring_emit(ring, 0);
1026 		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1027 		intel_ring_emit(ring, cs_offset);
1028 		intel_ring_emit(ring, 0);
1029 		intel_ring_emit(ring, 4096);
1030 		intel_ring_emit(ring, offset);
1031 		intel_ring_emit(ring, MI_FLUSH);
1032 
1033 		/* ... and execute it. */
1034 		intel_ring_emit(ring, MI_BATCH_BUFFER);
1035 		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1036 		intel_ring_emit(ring, cs_offset + len - 8);
1037 		intel_ring_advance(ring);
1038 	}
1039 
1040 	return 0;
1041 }
1042 
1043 static int
i915_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1044 i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1045 			 u32 offset, u32 len,
1046 			 unsigned flags)
1047 {
1048 	int ret;
1049 
1050 	ret = intel_ring_begin(ring, 2);
1051 	if (ret)
1052 		return ret;
1053 
1054 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1055 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1056 	intel_ring_advance(ring);
1057 
1058 	return 0;
1059 }
1060 
cleanup_status_page(struct intel_ring_buffer * ring)1061 static void cleanup_status_page(struct intel_ring_buffer *ring)
1062 {
1063 	struct drm_i915_gem_object *obj;
1064 
1065 	obj = ring->status_page.obj;
1066 	if (obj == NULL)
1067 		return;
1068 
1069 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
1070 	kva_free((vm_offset_t)ring->status_page.page_addr,
1071 	    PAGE_SIZE);
1072 	i915_gem_object_unpin(obj);
1073 	drm_gem_object_unreference(&obj->base);
1074 	ring->status_page.obj = NULL;
1075 }
1076 
init_status_page(struct intel_ring_buffer * ring)1077 static int init_status_page(struct intel_ring_buffer *ring)
1078 {
1079 	struct drm_device *dev = ring->dev;
1080 	struct drm_i915_gem_object *obj;
1081 	int ret;
1082 
1083 	obj = i915_gem_alloc_object(dev, 4096);
1084 	if (obj == NULL) {
1085 		DRM_ERROR("Failed to allocate status page\n");
1086 		ret = -ENOMEM;
1087 		goto err;
1088 	}
1089 
1090 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1091 
1092 	ret = i915_gem_object_pin(obj, 4096, true, false);
1093 	if (ret != 0) {
1094 		goto err_unref;
1095 	}
1096 
1097 	ring->status_page.gfx_addr = obj->gtt_offset;
1098 	ring->status_page.page_addr = (void *)kva_alloc(PAGE_SIZE);
1099 	if (ring->status_page.page_addr == NULL) {
1100 		ret = -ENOMEM;
1101 		goto err_unpin;
1102 	}
1103 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1104 	    1);
1105 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1106 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE, FALSE);
1107 	ring->status_page.obj = obj;
1108 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1109 
1110 	intel_ring_setup_status_page(ring);
1111 	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1112 			ring->name, ring->status_page.gfx_addr);
1113 
1114 	return 0;
1115 
1116 err_unpin:
1117 	i915_gem_object_unpin(obj);
1118 err_unref:
1119 	drm_gem_object_unreference(&obj->base);
1120 err:
1121 	return ret;
1122 }
1123 
init_phys_hws_pga(struct intel_ring_buffer * ring)1124 static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1125 {
1126 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1127 	u32 addr;
1128 
1129 	if (!dev_priv->status_page_dmah) {
1130 		dev_priv->status_page_dmah =
1131 			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE, BUS_SPACE_MAXADDR);
1132 		if (!dev_priv->status_page_dmah)
1133 			return -ENOMEM;
1134 	}
1135 
1136 	addr = dev_priv->status_page_dmah->busaddr;
1137 	if (INTEL_INFO(ring->dev)->gen >= 4)
1138 		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1139 	I915_WRITE(HWS_PGA, addr);
1140 
1141 	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1142 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1143 
1144 	return 0;
1145 }
1146 
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1147 static int intel_init_ring_buffer(struct drm_device *dev,
1148 				  struct intel_ring_buffer *ring)
1149 {
1150 	struct drm_i915_gem_object *obj;
1151 	struct drm_i915_private *dev_priv = dev->dev_private;
1152 	int ret;
1153 
1154 	ring->dev = dev;
1155 	INIT_LIST_HEAD(&ring->active_list);
1156 	INIT_LIST_HEAD(&ring->request_list);
1157 	ring->size = 32 * PAGE_SIZE;
1158 	memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1159 
1160 #ifdef __linux__
1161 	init_waitqueue_head(&ring->irq_queue);
1162 #endif
1163 
1164 	if (I915_NEED_GFX_HWS(dev)) {
1165 		ret = init_status_page(ring);
1166 		if (ret)
1167 			return ret;
1168 	} else {
1169 		BUG_ON(ring->id != RCS);
1170 		ret = init_phys_hws_pga(ring);
1171 		if (ret)
1172 			return ret;
1173 	}
1174 
1175 	obj = i915_gem_alloc_object(dev, ring->size);
1176 	if (obj == NULL) {
1177 		DRM_ERROR("Failed to allocate ringbuffer\n");
1178 		ret = -ENOMEM;
1179 		goto err_hws;
1180 	}
1181 
1182 	ring->obj = obj;
1183 
1184 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1185 	if (ret)
1186 		goto err_unref;
1187 
1188 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1189 	if (ret)
1190 		goto err_unpin;
1191 
1192 	ring->virtual_start =
1193 		pmap_mapdev_attr(
1194 		    dev_priv->mm.gtt->gma_bus_addr + obj->gtt_offset, ring->size,
1195 		    VM_MEMATTR_WRITE_COMBINING);
1196 	if (ring->virtual_start == NULL) {
1197 		DRM_ERROR("Failed to map ringbuffer.\n");
1198 		ret = -EINVAL;
1199 		goto err_unpin;
1200 	}
1201 
1202 	ret = ring->init(ring);
1203 	if (ret)
1204 		goto err_unmap;
1205 
1206 	/* Workaround an erratum on the i830 which causes a hang if
1207 	 * the TAIL pointer points to within the last 2 cachelines
1208 	 * of the buffer.
1209 	 */
1210 	ring->effective_size = ring->size;
1211 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1212 		ring->effective_size -= 128;
1213 
1214 	return 0;
1215 
1216 err_unmap:
1217 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1218 err_unpin:
1219 	i915_gem_object_unpin(obj);
1220 err_unref:
1221 	drm_gem_object_unreference(&obj->base);
1222 	ring->obj = NULL;
1223 err_hws:
1224 	cleanup_status_page(ring);
1225 	return ret;
1226 }
1227 
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1228 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1229 {
1230 	struct drm_i915_private *dev_priv;
1231 	int ret;
1232 
1233 	if (ring->obj == NULL)
1234 		return;
1235 
1236 	/* Disable the ring buffer. The ring must be idle at this point */
1237 	dev_priv = ring->dev->dev_private;
1238 	ret = intel_ring_idle(ring);
1239 	if (ret)
1240 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1241 			  ring->name, ret);
1242 
1243 	I915_WRITE_CTL(ring, 0);
1244 
1245 	pmap_unmapdev((vm_offset_t)ring->virtual_start, ring->size);
1246 
1247 	i915_gem_object_unpin(ring->obj);
1248 	drm_gem_object_unreference(&ring->obj->base);
1249 	ring->obj = NULL;
1250 
1251 	if (ring->cleanup)
1252 		ring->cleanup(ring);
1253 
1254 	cleanup_status_page(ring);
1255 }
1256 
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1257 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1258 {
1259 	int ret;
1260 
1261 	ret = i915_wait_seqno(ring, seqno);
1262 	if (!ret)
1263 		i915_gem_retire_requests_ring(ring);
1264 
1265 	return ret;
1266 }
1267 
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1268 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1269 {
1270 	struct drm_i915_gem_request *request;
1271 	u32 seqno = 0;
1272 	int ret;
1273 
1274 	i915_gem_retire_requests_ring(ring);
1275 
1276 	if (ring->last_retired_head != -1) {
1277 		ring->head = ring->last_retired_head;
1278 		ring->last_retired_head = -1;
1279 		ring->space = ring_space(ring);
1280 		if (ring->space >= n)
1281 			return 0;
1282 	}
1283 
1284 	list_for_each_entry(request, &ring->request_list, list) {
1285 		int space;
1286 
1287 		if (request->tail == -1)
1288 			continue;
1289 
1290 		space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1291 		if (space < 0)
1292 			space += ring->size;
1293 		if (space >= n) {
1294 			seqno = request->seqno;
1295 			break;
1296 		}
1297 
1298 		/* Consume this request in case we need more space than
1299 		 * is available and so need to prevent a race between
1300 		 * updating last_retired_head and direct reads of
1301 		 * I915_RING_HEAD. It also provides a nice sanity check.
1302 		 */
1303 		request->tail = -1;
1304 	}
1305 
1306 	if (seqno == 0)
1307 		return -ENOSPC;
1308 
1309 	ret = intel_ring_wait_seqno(ring, seqno);
1310 	if (ret)
1311 		return ret;
1312 
1313 	if (WARN_ON(ring->last_retired_head == -1))
1314 		return -ENOSPC;
1315 
1316 	ring->head = ring->last_retired_head;
1317 	ring->last_retired_head = -1;
1318 	ring->space = ring_space(ring);
1319 	if (WARN_ON(ring->space < n))
1320 		return -ENOSPC;
1321 
1322 	return 0;
1323 }
1324 
ring_wait_for_space(struct intel_ring_buffer * ring,int n)1325 static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1326 {
1327 	struct drm_device *dev = ring->dev;
1328 	struct drm_i915_private *dev_priv = dev->dev_private;
1329 	unsigned long end;
1330 	int ret;
1331 
1332 	ret = intel_ring_wait_request(ring, n);
1333 	if (ret != -ENOSPC)
1334 		return ret;
1335 
1336 	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1337 	/* With GEM the hangcheck timer should kick us out of the loop,
1338 	 * leaving it early runs the risk of corrupting GEM state (due
1339 	 * to running on almost untested codepaths). But on resume
1340 	 * timers don't work yet, so prevent a complete hang in that
1341 	 * case by choosing an insanely large timeout. */
1342 	end = jiffies + 60 * HZ;
1343 
1344 	do {
1345 		ring->head = I915_READ_HEAD(ring);
1346 		ring->space = ring_space(ring);
1347 		if (ring->space >= n) {
1348 			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1349 			return 0;
1350 		}
1351 
1352 		if (dev->primary->master) {
1353 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1354 			if (master_priv->sarea_priv)
1355 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1356 		}
1357 
1358 		DRM_MSLEEP(1);
1359 
1360 		ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1361 		if (ret) {
1362 			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1363 			return ret;
1364 		}
1365 	} while (!time_after(jiffies, end));
1366 	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1367 	return -EBUSY;
1368 }
1369 
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1370 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1371 {
1372 	uint32_t __iomem *virt;
1373 	int rem = ring->size - ring->tail;
1374 
1375 	if (ring->space < rem) {
1376 		int ret = ring_wait_for_space(ring, rem);
1377 		if (ret)
1378 			return ret;
1379 	}
1380 
1381 	virt = (uint32_t *)((char *)ring->virtual_start + ring->tail);
1382 	rem /= 4;
1383 	while (rem--)
1384 		iowrite32(MI_NOOP, virt++);
1385 
1386 	ring->tail = 0;
1387 	ring->space = ring_space(ring);
1388 
1389 	return 0;
1390 }
1391 
intel_ring_idle(struct intel_ring_buffer * ring)1392 int intel_ring_idle(struct intel_ring_buffer *ring)
1393 {
1394 	u32 seqno;
1395 	int ret;
1396 
1397 	/* We need to add any requests required to flush the objects and ring */
1398 	if (ring->outstanding_lazy_request) {
1399 		ret = i915_add_request(ring, NULL, NULL);
1400 		if (ret)
1401 			return ret;
1402 	}
1403 
1404 	/* Wait upon the last request to be completed */
1405 	if (list_empty(&ring->request_list))
1406 		return 0;
1407 
1408 	seqno = list_entry(ring->request_list.prev,
1409 			   struct drm_i915_gem_request,
1410 			   list)->seqno;
1411 
1412 	return i915_wait_seqno(ring, seqno);
1413 }
1414 
1415 static int
intel_ring_alloc_seqno(struct intel_ring_buffer * ring)1416 intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1417 {
1418 	if (ring->outstanding_lazy_request)
1419 		return 0;
1420 
1421 	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1422 }
1423 
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1424 int intel_ring_begin(struct intel_ring_buffer *ring,
1425 		     int num_dwords)
1426 {
1427 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1428 	int n = 4*num_dwords;
1429 	int ret;
1430 
1431 	ret = i915_gem_check_wedge(dev_priv, dev_priv->mm.interruptible);
1432 	if (ret)
1433 		return ret;
1434 
1435 	/* Preallocate the olr before touching the ring */
1436 	ret = intel_ring_alloc_seqno(ring);
1437 	if (ret)
1438 		return ret;
1439 
1440 	if (unlikely(ring->tail + n > ring->effective_size)) {
1441 		ret = intel_wrap_ring_buffer(ring);
1442 		if (unlikely(ret))
1443 			return ret;
1444 	}
1445 
1446 	if (unlikely(ring->space < n)) {
1447 		ret = ring_wait_for_space(ring, n);
1448 		if (unlikely(ret))
1449 			return ret;
1450 	}
1451 
1452 	ring->space -= n;
1453 	return 0;
1454 }
1455 
intel_ring_advance(struct intel_ring_buffer * ring)1456 void intel_ring_advance(struct intel_ring_buffer *ring)
1457 {
1458 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1459 
1460 	ring->tail &= ring->size - 1;
1461 	if (dev_priv->stop_rings & intel_ring_flag(ring))
1462 		return;
1463 	ring->write_tail(ring, ring->tail);
1464 }
1465 
1466 
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,u32 value)1467 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1468 				     u32 value)
1469 {
1470 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1471 
1472        /* Every tail move must follow the sequence below */
1473 
1474 	/* Disable notification that the ring is IDLE. The GT
1475 	 * will then assume that it is busy and bring it out of rc6.
1476 	 */
1477 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1478 		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1479 
1480 	/* Clear the context id. Here be magic! */
1481 	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
1482 
1483 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
1484 	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1485 		      GEN6_BSD_SLEEP_INDICATOR) == 0,
1486 		     50))
1487 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
1488 
1489 	/* Now that the ring is fully powered up, update the tail */
1490 	I915_WRITE_TAIL(ring, value);
1491 	POSTING_READ(RING_TAIL(ring->mmio_base));
1492 
1493 	/* Let the ring send IDLE messages to the GT again,
1494 	 * and so let it sleep to conserve power when idle.
1495 	 */
1496 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1497 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
1498 }
1499 
gen6_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1500 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1501 			   u32 invalidate, u32 flush)
1502 {
1503 	uint32_t cmd;
1504 	int ret;
1505 
1506 	ret = intel_ring_begin(ring, 4);
1507 	if (ret)
1508 		return ret;
1509 
1510 	cmd = MI_FLUSH_DW;
1511 	/*
1512 	 * Bspec vol 1c.5 - video engine command streamer:
1513 	 * "If ENABLED, all TLBs will be invalidated once the flush
1514 	 * operation is complete. This bit is only valid when the
1515 	 * Post-Sync Operation field is a value of 1h or 3h."
1516 	 */
1517 	if (invalidate & I915_GEM_GPU_DOMAINS)
1518 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1519 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1520 	intel_ring_emit(ring, cmd);
1521 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1522 	intel_ring_emit(ring, 0);
1523 	intel_ring_emit(ring, MI_NOOP);
1524 	intel_ring_advance(ring);
1525 	return 0;
1526 }
1527 
1528 static int
hsw_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1529 hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1530 			      u32 offset, u32 len,
1531 			      unsigned flags)
1532 {
1533 	int ret;
1534 
1535 	ret = intel_ring_begin(ring, 2);
1536 	if (ret)
1537 		return ret;
1538 
1539 	intel_ring_emit(ring,
1540 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1541 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1542 	/* bit0-7 is the length on GEN6+ */
1543 	intel_ring_emit(ring, offset);
1544 	intel_ring_advance(ring);
1545 
1546 	return 0;
1547 }
1548 
1549 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,u32 offset,u32 len,unsigned flags)1550 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1551 			      u32 offset, u32 len,
1552 			      unsigned flags)
1553 {
1554 	int ret;
1555 
1556 	ret = intel_ring_begin(ring, 2);
1557 	if (ret)
1558 		return ret;
1559 
1560 	intel_ring_emit(ring,
1561 			MI_BATCH_BUFFER_START |
1562 			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1563 	/* bit0-7 is the length on GEN6+ */
1564 	intel_ring_emit(ring, offset);
1565 	intel_ring_advance(ring);
1566 
1567 	return 0;
1568 }
1569 
1570 /* Blitter support (SandyBridge+) */
1571 
blt_ring_flush(struct intel_ring_buffer * ring,u32 invalidate,u32 flush)1572 static int blt_ring_flush(struct intel_ring_buffer *ring,
1573 			  u32 invalidate, u32 flush)
1574 {
1575 	uint32_t cmd;
1576 	int ret;
1577 
1578 	ret = intel_ring_begin(ring, 4);
1579 	if (ret)
1580 		return ret;
1581 
1582 	cmd = MI_FLUSH_DW;
1583 	/*
1584 	 * Bspec vol 1c.3 - blitter engine command streamer:
1585 	 * "If ENABLED, all TLBs will be invalidated once the flush
1586 	 * operation is complete. This bit is only valid when the
1587 	 * Post-Sync Operation field is a value of 1h or 3h."
1588 	 */
1589 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1590 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1591 			MI_FLUSH_DW_OP_STOREDW;
1592 	intel_ring_emit(ring, cmd);
1593 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1594 	intel_ring_emit(ring, 0);
1595 	intel_ring_emit(ring, MI_NOOP);
1596 	intel_ring_advance(ring);
1597 	return 0;
1598 }
1599 
intel_init_render_ring_buffer(struct drm_device * dev)1600 int intel_init_render_ring_buffer(struct drm_device *dev)
1601 {
1602 	drm_i915_private_t *dev_priv = dev->dev_private;
1603 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1604 
1605 	ring->name = "render ring";
1606 	ring->id = RCS;
1607 	ring->mmio_base = RENDER_RING_BASE;
1608 
1609 	if (INTEL_INFO(dev)->gen >= 6) {
1610 		ring->add_request = gen6_add_request;
1611 		ring->flush = gen7_render_ring_flush;
1612 		if (INTEL_INFO(dev)->gen == 6)
1613 			ring->flush = gen6_render_ring_flush;
1614 		ring->irq_get = gen6_ring_get_irq;
1615 		ring->irq_put = gen6_ring_put_irq;
1616 		ring->irq_enable_mask = GT_USER_INTERRUPT;
1617 		ring->get_seqno = gen6_ring_get_seqno;
1618 		ring->sync_to = gen6_ring_sync;
1619 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_INVALID;
1620 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_RV;
1621 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_RB;
1622 		ring->signal_mbox[0] = GEN6_VRSYNC;
1623 		ring->signal_mbox[1] = GEN6_BRSYNC;
1624 	} else if (IS_GEN5(dev)) {
1625 		ring->add_request = pc_render_add_request;
1626 		ring->flush = gen4_render_ring_flush;
1627 		ring->get_seqno = pc_render_get_seqno;
1628 		ring->irq_get = gen5_ring_get_irq;
1629 		ring->irq_put = gen5_ring_put_irq;
1630 		ring->irq_enable_mask = GT_USER_INTERRUPT | GT_PIPE_NOTIFY;
1631 	} else {
1632 		ring->add_request = i9xx_add_request;
1633 		if (INTEL_INFO(dev)->gen < 4)
1634 			ring->flush = gen2_render_ring_flush;
1635 		else
1636 			ring->flush = gen4_render_ring_flush;
1637 		ring->get_seqno = ring_get_seqno;
1638 		if (IS_GEN2(dev)) {
1639 			ring->irq_get = i8xx_ring_get_irq;
1640 			ring->irq_put = i8xx_ring_put_irq;
1641 		} else {
1642 			ring->irq_get = i9xx_ring_get_irq;
1643 			ring->irq_put = i9xx_ring_put_irq;
1644 		}
1645 		ring->irq_enable_mask = I915_USER_INTERRUPT;
1646 	}
1647 	ring->write_tail = ring_write_tail;
1648 	if (IS_HASWELL(dev))
1649 		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1650 	else if (INTEL_INFO(dev)->gen >= 6)
1651 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1652 	else if (INTEL_INFO(dev)->gen >= 4)
1653 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1654 	else if (IS_I830(dev) || IS_845G(dev))
1655 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1656 	else
1657 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1658 	ring->init = init_render_ring;
1659 	ring->cleanup = render_ring_cleanup;
1660 
1661 	/* Workaround batchbuffer to combat CS tlb bug. */
1662 	if (HAS_BROKEN_CS_TLB(dev)) {
1663 		struct drm_i915_gem_object *obj;
1664 		int ret;
1665 
1666 		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1667 		if (obj == NULL) {
1668 			DRM_ERROR("Failed to allocate batch bo\n");
1669 			return -ENOMEM;
1670 		}
1671 
1672 		ret = i915_gem_object_pin(obj, 0, true, false);
1673 		if (ret != 0) {
1674 			drm_gem_object_unreference(&obj->base);
1675 			DRM_ERROR("Failed to ping batch bo\n");
1676 			return ret;
1677 		}
1678 
1679 		ring->private = obj;
1680 	}
1681 
1682 	return intel_init_ring_buffer(dev, ring);
1683 }
1684 
intel_render_ring_init_dri(struct drm_device * dev,u64 start,u32 size)1685 int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1686 {
1687 	drm_i915_private_t *dev_priv = dev->dev_private;
1688 	struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1689 	int ret;
1690 
1691 	ring->name = "render ring";
1692 	ring->id = RCS;
1693 	ring->mmio_base = RENDER_RING_BASE;
1694 
1695 	if (INTEL_INFO(dev)->gen >= 6) {
1696 		/* non-kms not supported on gen6+ */
1697 		return -ENODEV;
1698 	}
1699 
1700 	/* Note: gem is not supported on gen5/ilk without kms (the corresponding
1701 	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up
1702 	 * the special gen5 functions. */
1703 	ring->add_request = i9xx_add_request;
1704 	if (INTEL_INFO(dev)->gen < 4)
1705 		ring->flush = gen2_render_ring_flush;
1706 	else
1707 		ring->flush = gen4_render_ring_flush;
1708 	ring->get_seqno = ring_get_seqno;
1709 	if (IS_GEN2(dev)) {
1710 		ring->irq_get = i8xx_ring_get_irq;
1711 		ring->irq_put = i8xx_ring_put_irq;
1712 	} else {
1713 		ring->irq_get = i9xx_ring_get_irq;
1714 		ring->irq_put = i9xx_ring_put_irq;
1715 	}
1716 	ring->irq_enable_mask = I915_USER_INTERRUPT;
1717 	ring->write_tail = ring_write_tail;
1718 	if (INTEL_INFO(dev)->gen >= 4)
1719 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1720 	else if (IS_I830(dev) || IS_845G(dev))
1721 		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
1722 	else
1723 		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
1724 	ring->init = init_render_ring;
1725 	ring->cleanup = render_ring_cleanup;
1726 
1727 	ring->dev = dev;
1728 	INIT_LIST_HEAD(&ring->active_list);
1729 	INIT_LIST_HEAD(&ring->request_list);
1730 
1731 	ring->size = size;
1732 	ring->effective_size = ring->size;
1733 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1734 		ring->effective_size -= 128;
1735 
1736 	ring->virtual_start = pmap_mapdev_attr(start, size,
1737 	    VM_MEMATTR_WRITE_COMBINING);
1738 	if (ring->virtual_start == NULL) {
1739 		DRM_ERROR("can not ioremap virtual address for"
1740 			  " ring buffer\n");
1741 		return -ENOMEM;
1742 	}
1743 
1744 	if (!I915_NEED_GFX_HWS(dev)) {
1745 		ret = init_phys_hws_pga(ring);
1746 		if (ret)
1747 			return ret;
1748 	}
1749 
1750 	return 0;
1751 }
1752 
intel_init_bsd_ring_buffer(struct drm_device * dev)1753 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1754 {
1755 	drm_i915_private_t *dev_priv = dev->dev_private;
1756 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
1757 
1758 	ring->name = "bsd ring";
1759 	ring->id = VCS;
1760 
1761 	ring->write_tail = ring_write_tail;
1762 	if (IS_GEN6(dev) || IS_GEN7(dev)) {
1763 		ring->mmio_base = GEN6_BSD_RING_BASE;
1764 		/* gen6 bsd needs a special wa for tail updates */
1765 		if (IS_GEN6(dev))
1766 			ring->write_tail = gen6_bsd_ring_write_tail;
1767 		ring->flush = gen6_ring_flush;
1768 		ring->add_request = gen6_add_request;
1769 		ring->get_seqno = gen6_ring_get_seqno;
1770 		ring->irq_enable_mask = GEN6_BSD_USER_INTERRUPT;
1771 		ring->irq_get = gen6_ring_get_irq;
1772 		ring->irq_put = gen6_ring_put_irq;
1773 		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1774 		ring->sync_to = gen6_ring_sync;
1775 		ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_VR;
1776 		ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_INVALID;
1777 		ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_VB;
1778 		ring->signal_mbox[0] = GEN6_RVSYNC;
1779 		ring->signal_mbox[1] = GEN6_BVSYNC;
1780 	} else {
1781 		ring->mmio_base = BSD_RING_BASE;
1782 		ring->flush = bsd_ring_flush;
1783 		ring->add_request = i9xx_add_request;
1784 		ring->get_seqno = ring_get_seqno;
1785 		if (IS_GEN5(dev)) {
1786 			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
1787 			ring->irq_get = gen5_ring_get_irq;
1788 			ring->irq_put = gen5_ring_put_irq;
1789 		} else {
1790 			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
1791 			ring->irq_get = i9xx_ring_get_irq;
1792 			ring->irq_put = i9xx_ring_put_irq;
1793 		}
1794 		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
1795 	}
1796 	ring->init = init_ring_common;
1797 
1798 	return intel_init_ring_buffer(dev, ring);
1799 }
1800 
intel_init_blt_ring_buffer(struct drm_device * dev)1801 int intel_init_blt_ring_buffer(struct drm_device *dev)
1802 {
1803 	drm_i915_private_t *dev_priv = dev->dev_private;
1804 	struct intel_ring_buffer *ring = &dev_priv->ring[BCS];
1805 
1806 	ring->name = "blitter ring";
1807 	ring->id = BCS;
1808 
1809 	ring->mmio_base = BLT_RING_BASE;
1810 	ring->write_tail = ring_write_tail;
1811 	ring->flush = blt_ring_flush;
1812 	ring->add_request = gen6_add_request;
1813 	ring->get_seqno = gen6_ring_get_seqno;
1814 	ring->irq_enable_mask = GEN6_BLITTER_USER_INTERRUPT;
1815 	ring->irq_get = gen6_ring_get_irq;
1816 	ring->irq_put = gen6_ring_put_irq;
1817 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1818 	ring->sync_to = gen6_ring_sync;
1819 	ring->semaphore_register[0] = MI_SEMAPHORE_SYNC_BR;
1820 	ring->semaphore_register[1] = MI_SEMAPHORE_SYNC_BV;
1821 	ring->semaphore_register[2] = MI_SEMAPHORE_SYNC_INVALID;
1822 	ring->signal_mbox[0] = GEN6_RBSYNC;
1823 	ring->signal_mbox[1] = GEN6_VBSYNC;
1824 	ring->init = init_ring_common;
1825 
1826 	return intel_init_ring_buffer(dev, ring);
1827 }
1828 
1829 int
intel_ring_flush_all_caches(struct intel_ring_buffer * ring)1830 intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1831 {
1832 	int ret;
1833 
1834 	if (!ring->gpu_caches_dirty)
1835 		return 0;
1836 
1837 	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1838 	if (ret)
1839 		return ret;
1840 
1841 	ring->gpu_caches_dirty = false;
1842 	return 0;
1843 }
1844 
1845 int
intel_ring_invalidate_all_caches(struct intel_ring_buffer * ring)1846 intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1847 {
1848 	uint32_t flush_domains;
1849 	int ret;
1850 
1851 	flush_domains = 0;
1852 	if (ring->gpu_caches_dirty)
1853 		flush_domains = I915_GEM_GPU_DOMAINS;
1854 
1855 	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1856 	if (ret)
1857 		return ret;
1858 
1859 	ring->gpu_caches_dirty = false;
1860 	return 0;
1861 }
1862