1 /*
2  * Copyright © 2008-2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Zou Nan hai <nanhai.zou@intel.com>
26  *    Xiang Hai hao<haihao.xiang@intel.com>
27  *
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: stable/9/sys/dev/drm2/i915/intel_ringbuffer.c 254023 2013-08-07 05:55:02Z kib $");
32 
33 #include <dev/drm2/drmP.h>
34 #include <dev/drm2/drm.h>
35 #include <dev/drm2/i915/i915_drm.h>
36 #include <dev/drm2/i915/i915_drv.h>
37 #include <dev/drm2/i915/intel_drv.h>
38 #include <dev/drm2/i915/intel_ringbuffer.h>
39 #include <sys/sched.h>
40 #include <sys/sf_buf.h>
41 
42 /*
43  * 965+ support PIPE_CONTROL commands, which provide finer grained control
44  * over cache flushing.
45  */
46 struct pipe_control {
47 	struct drm_i915_gem_object *obj;
48 	volatile u32 *cpu_page;
49 	u32 gtt_offset;
50 };
51 
52 void
i915_trace_irq_get(struct intel_ring_buffer * ring,uint32_t seqno)53 i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno)
54 {
55 
56 	if (ring->trace_irq_seqno == 0) {
57 		mtx_lock(&ring->irq_lock);
58 		if (ring->irq_get(ring))
59 			ring->trace_irq_seqno = seqno;
60 		mtx_unlock(&ring->irq_lock);
61 	}
62 }
63 
ring_space(struct intel_ring_buffer * ring)64 static inline int ring_space(struct intel_ring_buffer *ring)
65 {
66 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
67 	if (space < 0)
68 		space += ring->size;
69 	return space;
70 }
71 
72 static int
render_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate_domains,uint32_t flush_domains)73 render_ring_flush(struct intel_ring_buffer *ring,
74 		  uint32_t	invalidate_domains,
75 		  uint32_t	flush_domains)
76 {
77 	struct drm_device *dev = ring->dev;
78 	uint32_t cmd;
79 	int ret;
80 
81 	/*
82 	 * read/write caches:
83 	 *
84 	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85 	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
86 	 * also flushed at 2d versus 3d pipeline switches.
87 	 *
88 	 * read-only caches:
89 	 *
90 	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91 	 * MI_READ_FLUSH is set, and is always flushed on 965.
92 	 *
93 	 * I915_GEM_DOMAIN_COMMAND may not exist?
94 	 *
95 	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96 	 * invalidated when MI_EXE_FLUSH is set.
97 	 *
98 	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99 	 * invalidated with every MI_FLUSH.
100 	 *
101 	 * TLBs:
102 	 *
103 	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104 	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105 	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106 	 * are flushed at any MI_FLUSH.
107 	 */
108 
109 	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
110 	if ((invalidate_domains|flush_domains) &
111 	    I915_GEM_DOMAIN_RENDER)
112 		cmd &= ~MI_NO_WRITE_FLUSH;
113 	if (INTEL_INFO(dev)->gen < 4) {
114 		/*
115 		 * On the 965, the sampler cache always gets flushed
116 		 * and this bit is reserved.
117 		 */
118 		if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
119 			cmd |= MI_READ_FLUSH;
120 	}
121 	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122 		cmd |= MI_EXE_FLUSH;
123 
124 	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125 	    (IS_G4X(dev) || IS_GEN5(dev)))
126 		cmd |= MI_INVALIDATE_ISP;
127 
128 	ret = intel_ring_begin(ring, 2);
129 	if (ret)
130 		return ret;
131 
132 	intel_ring_emit(ring, cmd);
133 	intel_ring_emit(ring, MI_NOOP);
134 	intel_ring_advance(ring);
135 
136 	return 0;
137 }
138 
139 /**
140  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141  * implementing two workarounds on gen6.  From section 1.4.7.1
142  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143  *
144  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145  * produced by non-pipelined state commands), software needs to first
146  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147  * 0.
148  *
149  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151  *
152  * And the workaround for these two requires this workaround first:
153  *
154  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155  * BEFORE the pipe-control with a post-sync op and no write-cache
156  * flushes.
157  *
158  * And this last workaround is tricky because of the requirements on
159  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160  * volume 2 part 1:
161  *
162  *     "1 of the following must also be set:
163  *      - Render Target Cache Flush Enable ([12] of DW1)
164  *      - Depth Cache Flush Enable ([0] of DW1)
165  *      - Stall at Pixel Scoreboard ([1] of DW1)
166  *      - Depth Stall ([13] of DW1)
167  *      - Post-Sync Operation ([13] of DW1)
168  *      - Notify Enable ([8] of DW1)"
169  *
170  * The cache flushes require the workaround flush that triggered this
171  * one, so we can't use it.  Depth stall would trigger the same.
172  * Post-sync nonzero is what triggered this second workaround, so we
173  * can't use that one either.  Notify enable is IRQs, which aren't
174  * really our business.  That leaves only stall at scoreboard.
175  */
176 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)177 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178 {
179 	struct pipe_control *pc = ring->private;
180 	u32 scratch_addr = pc->gtt_offset + 128;
181 	int ret;
182 
183 
184 	ret = intel_ring_begin(ring, 6);
185 	if (ret)
186 		return ret;
187 
188 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190 			PIPE_CONTROL_STALL_AT_SCOREBOARD);
191 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192 	intel_ring_emit(ring, 0); /* low dword */
193 	intel_ring_emit(ring, 0); /* high dword */
194 	intel_ring_emit(ring, MI_NOOP);
195 	intel_ring_advance(ring);
196 
197 	ret = intel_ring_begin(ring, 6);
198 	if (ret)
199 		return ret;
200 
201 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204 	intel_ring_emit(ring, 0);
205 	intel_ring_emit(ring, 0);
206 	intel_ring_emit(ring, MI_NOOP);
207 	intel_ring_advance(ring);
208 
209 	return 0;
210 }
211 
212 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)213 gen6_render_ring_flush(struct intel_ring_buffer *ring,
214                          u32 invalidate_domains, u32 flush_domains)
215 {
216 	u32 flags = 0;
217 	struct pipe_control *pc = ring->private;
218 	u32 scratch_addr = pc->gtt_offset + 128;
219 	int ret;
220 
221 	/* Force SNB workarounds for PIPE_CONTROL flushes */
222 	intel_emit_post_sync_nonzero_flush(ring);
223 
224 	/* Just flush everything.  Experiments have shown that reducing the
225 	 * number of bits based on the write domains has little performance
226 	 * impact.
227 	 */
228 	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229 	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
230 	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
231 	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232 	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
233 	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
234 	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
235 
236 	ret = intel_ring_begin(ring, 6);
237 	if (ret)
238 		return ret;
239 
240 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241 	intel_ring_emit(ring, flags);
242 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
243 	intel_ring_emit(ring, 0); /* lower dword */
244 	intel_ring_emit(ring, 0); /* uppwer dword */
245 	intel_ring_emit(ring, MI_NOOP);
246 	intel_ring_advance(ring);
247 
248 	return 0;
249 }
250 
ring_write_tail(struct intel_ring_buffer * ring,uint32_t value)251 static void ring_write_tail(struct intel_ring_buffer *ring,
252 			    uint32_t value)
253 {
254 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
255 	I915_WRITE_TAIL(ring, value);
256 }
257 
intel_ring_get_active_head(struct intel_ring_buffer * ring)258 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
259 {
260 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
261 	uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
262 			RING_ACTHD(ring->mmio_base) : ACTHD;
263 
264 	return I915_READ(acthd_reg);
265 }
266 
init_ring_common(struct intel_ring_buffer * ring)267 static int init_ring_common(struct intel_ring_buffer *ring)
268 {
269 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
270 	struct drm_i915_gem_object *obj = ring->obj;
271 	uint32_t head;
272 
273 	/* Stop the ring if it's running. */
274 	I915_WRITE_CTL(ring, 0);
275 	I915_WRITE_HEAD(ring, 0);
276 	ring->write_tail(ring, 0);
277 
278 	/* Initialize the ring. */
279 	I915_WRITE_START(ring, obj->gtt_offset);
280 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
281 
282 	/* G45 ring initialization fails to reset head to zero */
283 	if (head != 0) {
284 		DRM_DEBUG("%s head not reset to zero "
285 			      "ctl %08x head %08x tail %08x start %08x\n",
286 			      ring->name,
287 			      I915_READ_CTL(ring),
288 			      I915_READ_HEAD(ring),
289 			      I915_READ_TAIL(ring),
290 			      I915_READ_START(ring));
291 
292 		I915_WRITE_HEAD(ring, 0);
293 
294 		if (I915_READ_HEAD(ring) & HEAD_ADDR) {
295 			DRM_ERROR("failed to set %s head to zero "
296 				  "ctl %08x head %08x tail %08x start %08x\n",
297 				  ring->name,
298 				  I915_READ_CTL(ring),
299 				  I915_READ_HEAD(ring),
300 				  I915_READ_TAIL(ring),
301 				  I915_READ_START(ring));
302 		}
303 	}
304 
305 	I915_WRITE_CTL(ring,
306 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
307 			| RING_VALID);
308 
309 	/* If the head is still not zero, the ring is dead */
310 	if (_intel_wait_for(ring->dev,
311 	    (I915_READ_CTL(ring) & RING_VALID) != 0 &&
312 	     I915_READ_START(ring) == obj->gtt_offset &&
313 	     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
314 	    50, 1, "915rii")) {
315 		DRM_ERROR("%s initialization failed "
316 				"ctl %08x head %08x tail %08x start %08x\n",
317 				ring->name,
318 				I915_READ_CTL(ring),
319 				I915_READ_HEAD(ring),
320 				I915_READ_TAIL(ring),
321 				I915_READ_START(ring));
322 		return -EIO;
323 	}
324 
325 	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
326 		i915_kernel_lost_context(ring->dev);
327 	else {
328 		ring->head = I915_READ_HEAD(ring);
329 		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
330 		ring->space = ring_space(ring);
331 	}
332 
333 	return 0;
334 }
335 
336 static int
init_pipe_control(struct intel_ring_buffer * ring)337 init_pipe_control(struct intel_ring_buffer *ring)
338 {
339 	struct pipe_control *pc;
340 	struct drm_i915_gem_object *obj;
341 	int ret;
342 
343 	if (ring->private)
344 		return 0;
345 
346 	pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
347 	if (!pc)
348 		return -ENOMEM;
349 
350 	obj = i915_gem_alloc_object(ring->dev, 4096);
351 	if (obj == NULL) {
352 		DRM_ERROR("Failed to allocate seqno page\n");
353 		ret = -ENOMEM;
354 		goto err;
355 	}
356 
357 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
358 
359 	ret = i915_gem_object_pin(obj, 4096, true);
360 	if (ret)
361 		goto err_unref;
362 
363 	pc->gtt_offset = obj->gtt_offset;
364 	pc->cpu_page = (uint32_t *)kmem_alloc_nofault(kernel_map, PAGE_SIZE);
365 	if (pc->cpu_page == NULL)
366 		goto err_unpin;
367 	pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
368 	pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
369 	    (vm_offset_t)pc->cpu_page + PAGE_SIZE);
370 
371 	pc->obj = obj;
372 	ring->private = pc;
373 	return 0;
374 
375 err_unpin:
376 	i915_gem_object_unpin(obj);
377 err_unref:
378 	drm_gem_object_unreference(&obj->base);
379 err:
380 	free(pc, DRM_I915_GEM);
381 	return ret;
382 }
383 
384 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)385 cleanup_pipe_control(struct intel_ring_buffer *ring)
386 {
387 	struct pipe_control *pc = ring->private;
388 	struct drm_i915_gem_object *obj;
389 
390 	if (!ring->private)
391 		return;
392 
393 	obj = pc->obj;
394 	pmap_qremove((vm_offset_t)pc->cpu_page, 1);
395 	kmem_free(kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
396 	i915_gem_object_unpin(obj);
397 	drm_gem_object_unreference(&obj->base);
398 
399 	free(pc, DRM_I915_GEM);
400 	ring->private = NULL;
401 }
402 
init_render_ring(struct intel_ring_buffer * ring)403 static int init_render_ring(struct intel_ring_buffer *ring)
404 {
405 	struct drm_device *dev = ring->dev;
406 	struct drm_i915_private *dev_priv = dev->dev_private;
407 	int ret = init_ring_common(ring);
408 
409 	if (INTEL_INFO(dev)->gen > 3) {
410 		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
411 		I915_WRITE(MI_MODE, mode);
412 		if (IS_GEN7(dev))
413 			I915_WRITE(GFX_MODE_GEN7,
414 				   GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
415 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
416 	}
417 
418 	if (INTEL_INFO(dev)->gen >= 5) {
419 		ret = init_pipe_control(ring);
420 		if (ret)
421 			return ret;
422 	}
423 
424 
425 	if (IS_GEN6(dev)) {
426 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
427 		 * "If this bit is set, STCunit will have LRA as replacement
428 		 *  policy. [...] This bit must be reset.  LRA replacement
429 		 *  policy is not supported."
430 		 */
431 		I915_WRITE(CACHE_MODE_0,
432 			   CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
433 	}
434 
435 	if (INTEL_INFO(dev)->gen >= 6) {
436 		I915_WRITE(INSTPM,
437 			   INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
438 	}
439 
440 	return ret;
441 }
442 
render_ring_cleanup(struct intel_ring_buffer * ring)443 static void render_ring_cleanup(struct intel_ring_buffer *ring)
444 {
445 	if (!ring->private)
446 		return;
447 
448 	cleanup_pipe_control(ring);
449 }
450 
451 static void
update_mboxes(struct intel_ring_buffer * ring,u32 seqno,u32 mmio_offset)452 update_mboxes(struct intel_ring_buffer *ring,
453 	    u32 seqno,
454 	    u32 mmio_offset)
455 {
456 	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
457 			      MI_SEMAPHORE_GLOBAL_GTT |
458 			      MI_SEMAPHORE_REGISTER |
459 			      MI_SEMAPHORE_UPDATE);
460 	intel_ring_emit(ring, seqno);
461 	intel_ring_emit(ring, mmio_offset);
462 }
463 
464 /**
465  * gen6_add_request - Update the semaphore mailbox registers
466  *
467  * @ring - ring that is adding a request
468  * @seqno - return seqno stuck into the ring
469  *
470  * Update the mailbox registers in the *other* rings with the current seqno.
471  * This acts like a signal in the canonical semaphore.
472  */
473 static int
gen6_add_request(struct intel_ring_buffer * ring,u32 * seqno)474 gen6_add_request(struct intel_ring_buffer *ring,
475 		 u32 *seqno)
476 {
477 	u32 mbox1_reg;
478 	u32 mbox2_reg;
479 	int ret;
480 
481 	ret = intel_ring_begin(ring, 10);
482 	if (ret)
483 		return ret;
484 
485 	mbox1_reg = ring->signal_mbox[0];
486 	mbox2_reg = ring->signal_mbox[1];
487 
488 	*seqno = i915_gem_next_request_seqno(ring);
489 
490 	update_mboxes(ring, *seqno, mbox1_reg);
491 	update_mboxes(ring, *seqno, mbox2_reg);
492 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
493 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
494 	intel_ring_emit(ring, *seqno);
495 	intel_ring_emit(ring, MI_USER_INTERRUPT);
496 	intel_ring_advance(ring);
497 
498 	return 0;
499 }
500 
501 /**
502  * intel_ring_sync - sync the waiter to the signaller on seqno
503  *
504  * @waiter - ring that is waiting
505  * @signaller - ring which has, or will signal
506  * @seqno - seqno which the waiter will block on
507  */
508 static int
intel_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,int ring,u32 seqno)509 intel_ring_sync(struct intel_ring_buffer *waiter,
510 		struct intel_ring_buffer *signaller,
511 		int ring,
512 		u32 seqno)
513 {
514 	int ret;
515 	u32 dw1 = MI_SEMAPHORE_MBOX |
516 		  MI_SEMAPHORE_COMPARE |
517 		  MI_SEMAPHORE_REGISTER;
518 
519 	ret = intel_ring_begin(waiter, 4);
520 	if (ret)
521 		return ret;
522 
523 	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
524 	intel_ring_emit(waiter, seqno);
525 	intel_ring_emit(waiter, 0);
526 	intel_ring_emit(waiter, MI_NOOP);
527 	intel_ring_advance(waiter);
528 
529 	return 0;
530 }
531 
532 int render_ring_sync_to(struct intel_ring_buffer *waiter,
533     struct intel_ring_buffer *signaller, u32 seqno);
534 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
535     struct intel_ring_buffer *signaller, u32 seqno);
536 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
537     struct intel_ring_buffer *signaller, u32 seqno);
538 
539 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
540 int
render_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)541 render_ring_sync_to(struct intel_ring_buffer *waiter,
542 		    struct intel_ring_buffer *signaller,
543 		    u32 seqno)
544 {
545 	KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID,
546 	    ("valid RCS semaphore"));
547 	return intel_ring_sync(waiter,
548 			       signaller,
549 			       RCS,
550 			       seqno);
551 }
552 
553 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
554 int
gen6_bsd_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)555 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
556 		      struct intel_ring_buffer *signaller,
557 		      u32 seqno)
558 {
559 	KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID,
560 	    ("Valid VCS semaphore"));
561 	return intel_ring_sync(waiter,
562 			       signaller,
563 			       VCS,
564 			       seqno);
565 }
566 
567 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
568 int
gen6_blt_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)569 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
570 		      struct intel_ring_buffer *signaller,
571 		      u32 seqno)
572 {
573 	KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID,
574 	    ("Valid BCS semaphore"));
575 	return intel_ring_sync(waiter,
576 			       signaller,
577 			       BCS,
578 			       seqno);
579 }
580 
581 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
582 do {									\
583 	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
584 		 PIPE_CONTROL_DEPTH_STALL);				\
585 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
586 	intel_ring_emit(ring__, 0);							\
587 	intel_ring_emit(ring__, 0);							\
588 } while (0)
589 
590 static int
pc_render_add_request(struct intel_ring_buffer * ring,uint32_t * result)591 pc_render_add_request(struct intel_ring_buffer *ring,
592 		      uint32_t *result)
593 {
594 	u32 seqno = i915_gem_next_request_seqno(ring);
595 	struct pipe_control *pc = ring->private;
596 	u32 scratch_addr = pc->gtt_offset + 128;
597 	int ret;
598 
599 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
600 	 * incoherent with writes to memory, i.e. completely fubar,
601 	 * so we need to use PIPE_NOTIFY instead.
602 	 *
603 	 * However, we also need to workaround the qword write
604 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
605 	 * memory before requesting an interrupt.
606 	 */
607 	ret = intel_ring_begin(ring, 32);
608 	if (ret)
609 		return ret;
610 
611 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
612 			PIPE_CONTROL_WRITE_FLUSH |
613 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
614 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
615 	intel_ring_emit(ring, seqno);
616 	intel_ring_emit(ring, 0);
617 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
618 	scratch_addr += 128; /* write to separate cachelines */
619 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
620 	scratch_addr += 128;
621 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
622 	scratch_addr += 128;
623 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
624 	scratch_addr += 128;
625 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
626 	scratch_addr += 128;
627 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
628 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
629 			PIPE_CONTROL_WRITE_FLUSH |
630 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
631 			PIPE_CONTROL_NOTIFY);
632 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
633 	intel_ring_emit(ring, seqno);
634 	intel_ring_emit(ring, 0);
635 	intel_ring_advance(ring);
636 
637 	*result = seqno;
638 	return 0;
639 }
640 
641 static int
render_ring_add_request(struct intel_ring_buffer * ring,uint32_t * result)642 render_ring_add_request(struct intel_ring_buffer *ring,
643 			uint32_t *result)
644 {
645 	u32 seqno = i915_gem_next_request_seqno(ring);
646 	int ret;
647 
648 	ret = intel_ring_begin(ring, 4);
649 	if (ret)
650 		return ret;
651 
652 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
653 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
654 	intel_ring_emit(ring, seqno);
655 	intel_ring_emit(ring, MI_USER_INTERRUPT);
656 	intel_ring_advance(ring);
657 
658 	*result = seqno;
659 	return 0;
660 }
661 
662  static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring)663 gen6_ring_get_seqno(struct intel_ring_buffer *ring)
664 {
665 	struct drm_device *dev = ring->dev;
666 
667 	/* Workaround to force correct ordering between irq and seqno writes on
668 	 * ivb (and maybe also on snb) by reading from a CS register (like
669 	 * ACTHD) before reading the status page. */
670 	if (/* IS_GEN6(dev) || */IS_GEN7(dev))
671 		intel_ring_get_active_head(ring);
672 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
673 }
674 
675 static uint32_t
ring_get_seqno(struct intel_ring_buffer * ring)676 ring_get_seqno(struct intel_ring_buffer *ring)
677 {
678 	if (ring->status_page.page_addr == NULL)
679 		return (-1);
680 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
681 }
682 
683 static uint32_t
pc_render_get_seqno(struct intel_ring_buffer * ring)684 pc_render_get_seqno(struct intel_ring_buffer *ring)
685 {
686 	struct pipe_control *pc = ring->private;
687 	if (pc != NULL)
688 		return pc->cpu_page[0];
689 	else
690 		return (-1);
691 }
692 
693 static void
ironlake_enable_irq(drm_i915_private_t * dev_priv,uint32_t mask)694 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
695 {
696 	dev_priv->gt_irq_mask &= ~mask;
697 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
698 	POSTING_READ(GTIMR);
699 }
700 
701 static void
ironlake_disable_irq(drm_i915_private_t * dev_priv,uint32_t mask)702 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
703 {
704 	dev_priv->gt_irq_mask |= mask;
705 	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
706 	POSTING_READ(GTIMR);
707 }
708 
709 static void
i915_enable_irq(drm_i915_private_t * dev_priv,uint32_t mask)710 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
711 {
712 	dev_priv->irq_mask &= ~mask;
713 	I915_WRITE(IMR, dev_priv->irq_mask);
714 	POSTING_READ(IMR);
715 }
716 
717 static void
i915_disable_irq(drm_i915_private_t * dev_priv,uint32_t mask)718 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
719 {
720 	dev_priv->irq_mask |= mask;
721 	I915_WRITE(IMR, dev_priv->irq_mask);
722 	POSTING_READ(IMR);
723 }
724 
725 static bool
render_ring_get_irq(struct intel_ring_buffer * ring)726 render_ring_get_irq(struct intel_ring_buffer *ring)
727 {
728 	struct drm_device *dev = ring->dev;
729 	drm_i915_private_t *dev_priv = dev->dev_private;
730 
731 	if (!dev->irq_enabled)
732 		return false;
733 
734 	mtx_assert(&ring->irq_lock, MA_OWNED);
735 	if (ring->irq_refcount++ == 0) {
736 		if (HAS_PCH_SPLIT(dev))
737 			ironlake_enable_irq(dev_priv,
738 					    GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
739 		else
740 			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
741 	}
742 
743 	return true;
744 }
745 
746 static void
render_ring_put_irq(struct intel_ring_buffer * ring)747 render_ring_put_irq(struct intel_ring_buffer *ring)
748 {
749 	struct drm_device *dev = ring->dev;
750 	drm_i915_private_t *dev_priv = dev->dev_private;
751 
752 	mtx_assert(&ring->irq_lock, MA_OWNED);
753 	if (--ring->irq_refcount == 0) {
754 		if (HAS_PCH_SPLIT(dev))
755 			ironlake_disable_irq(dev_priv,
756 					     GT_USER_INTERRUPT |
757 					     GT_PIPE_NOTIFY);
758 		else
759 			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
760 	}
761 }
762 
intel_ring_setup_status_page(struct intel_ring_buffer * ring)763 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
764 {
765 	struct drm_device *dev = ring->dev;
766 	drm_i915_private_t *dev_priv = dev->dev_private;
767 	uint32_t mmio = 0;
768 
769 	/* The ring status page addresses are no longer next to the rest of
770 	 * the ring registers as of gen7.
771 	 */
772 	if (IS_GEN7(dev)) {
773 		switch (ring->id) {
774 		case RCS:
775 			mmio = RENDER_HWS_PGA_GEN7;
776 			break;
777 		case BCS:
778 			mmio = BLT_HWS_PGA_GEN7;
779 			break;
780 		case VCS:
781 			mmio = BSD_HWS_PGA_GEN7;
782 			break;
783 		}
784 	} else if (IS_GEN6(dev)) {
785 		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
786 	} else {
787 		mmio = RING_HWS_PGA(ring->mmio_base);
788 	}
789 
790 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
791 	POSTING_READ(mmio);
792 }
793 
794 static int
bsd_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate_domains,uint32_t flush_domains)795 bsd_ring_flush(struct intel_ring_buffer *ring,
796 	       uint32_t     invalidate_domains,
797 	       uint32_t     flush_domains)
798 {
799 	int ret;
800 
801 	ret = intel_ring_begin(ring, 2);
802 	if (ret)
803 		return ret;
804 
805 	intel_ring_emit(ring, MI_FLUSH);
806 	intel_ring_emit(ring, MI_NOOP);
807 	intel_ring_advance(ring);
808 	return 0;
809 }
810 
811 static int
ring_add_request(struct intel_ring_buffer * ring,uint32_t * result)812 ring_add_request(struct intel_ring_buffer *ring,
813 		 uint32_t *result)
814 {
815 	uint32_t seqno;
816 	int ret;
817 
818 	ret = intel_ring_begin(ring, 4);
819 	if (ret)
820 		return ret;
821 
822 	seqno = i915_gem_next_request_seqno(ring);
823 
824 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
825 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
826 	intel_ring_emit(ring, seqno);
827 	intel_ring_emit(ring, MI_USER_INTERRUPT);
828 	intel_ring_advance(ring);
829 
830 	*result = seqno;
831 	return 0;
832 }
833 
834 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring,uint32_t gflag,uint32_t rflag)835 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
836 {
837 	struct drm_device *dev = ring->dev;
838 	drm_i915_private_t *dev_priv = dev->dev_private;
839 
840 	if (!dev->irq_enabled)
841 	       return false;
842 
843 	gen6_gt_force_wake_get(dev_priv);
844 
845 	mtx_assert(&ring->irq_lock, MA_OWNED);
846 	if (ring->irq_refcount++ == 0) {
847 		ring->irq_mask &= ~rflag;
848 		I915_WRITE_IMR(ring, ring->irq_mask);
849 		ironlake_enable_irq(dev_priv, gflag);
850 	}
851 
852 	return true;
853 }
854 
855 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring,uint32_t gflag,uint32_t rflag)856 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
857 {
858 	struct drm_device *dev = ring->dev;
859 	drm_i915_private_t *dev_priv = dev->dev_private;
860 
861 	mtx_assert(&ring->irq_lock, MA_OWNED);
862 	if (--ring->irq_refcount == 0) {
863 		ring->irq_mask |= rflag;
864 		I915_WRITE_IMR(ring, ring->irq_mask);
865 		ironlake_disable_irq(dev_priv, gflag);
866 	}
867 
868 	gen6_gt_force_wake_put(dev_priv);
869 }
870 
871 static bool
bsd_ring_get_irq(struct intel_ring_buffer * ring)872 bsd_ring_get_irq(struct intel_ring_buffer *ring)
873 {
874 	struct drm_device *dev = ring->dev;
875 	drm_i915_private_t *dev_priv = dev->dev_private;
876 
877 	if (!dev->irq_enabled)
878 		return false;
879 
880 	mtx_assert(&ring->irq_lock, MA_OWNED);
881 	if (ring->irq_refcount++ == 0) {
882 		if (IS_G4X(dev))
883 			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
884 		else
885 			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
886 	}
887 
888 	return true;
889 }
890 static void
bsd_ring_put_irq(struct intel_ring_buffer * ring)891 bsd_ring_put_irq(struct intel_ring_buffer *ring)
892 {
893 	struct drm_device *dev = ring->dev;
894 	drm_i915_private_t *dev_priv = dev->dev_private;
895 
896 	mtx_assert(&ring->irq_lock, MA_OWNED);
897 	if (--ring->irq_refcount == 0) {
898 		if (IS_G4X(dev))
899 			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
900 		else
901 			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
902 	}
903 }
904 
905 static int
ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t length)906 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset,
907     uint32_t length)
908 {
909 	int ret;
910 
911 	ret = intel_ring_begin(ring, 2);
912 	if (ret)
913 		return ret;
914 
915 	intel_ring_emit(ring,
916 			MI_BATCH_BUFFER_START | (2 << 6) |
917 			MI_BATCH_NON_SECURE_I965);
918 	intel_ring_emit(ring, offset);
919 	intel_ring_advance(ring);
920 
921 	return 0;
922 }
923 
924 static int
render_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t len)925 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
926 				uint32_t offset, uint32_t len)
927 {
928 	struct drm_device *dev = ring->dev;
929 	int ret;
930 
931 	if (IS_I830(dev) || IS_845G(dev)) {
932 		ret = intel_ring_begin(ring, 4);
933 		if (ret)
934 			return ret;
935 
936 		intel_ring_emit(ring, MI_BATCH_BUFFER);
937 		intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
938 		intel_ring_emit(ring, offset + len - 8);
939 		intel_ring_emit(ring, 0);
940 	} else {
941 		ret = intel_ring_begin(ring, 2);
942 		if (ret)
943 			return ret;
944 
945 		if (INTEL_INFO(dev)->gen >= 4) {
946 			intel_ring_emit(ring,
947 					MI_BATCH_BUFFER_START | (2 << 6) |
948 					MI_BATCH_NON_SECURE_I965);
949 			intel_ring_emit(ring, offset);
950 		} else {
951 			intel_ring_emit(ring,
952 					MI_BATCH_BUFFER_START | (2 << 6));
953 			intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
954 		}
955 	}
956 	intel_ring_advance(ring);
957 
958 	return 0;
959 }
960 
cleanup_status_page(struct intel_ring_buffer * ring)961 static void cleanup_status_page(struct intel_ring_buffer *ring)
962 {
963 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
964 	struct drm_i915_gem_object *obj;
965 
966 	obj = ring->status_page.obj;
967 	if (obj == NULL)
968 		return;
969 
970 	pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
971 	kmem_free(kernel_map, (vm_offset_t)ring->status_page.page_addr,
972 	    PAGE_SIZE);
973 	i915_gem_object_unpin(obj);
974 	drm_gem_object_unreference(&obj->base);
975 	ring->status_page.obj = NULL;
976 
977 	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
978 }
979 
init_status_page(struct intel_ring_buffer * ring)980 static int init_status_page(struct intel_ring_buffer *ring)
981 {
982 	struct drm_device *dev = ring->dev;
983 	drm_i915_private_t *dev_priv = dev->dev_private;
984 	struct drm_i915_gem_object *obj;
985 	int ret;
986 
987 	obj = i915_gem_alloc_object(dev, 4096);
988 	if (obj == NULL) {
989 		DRM_ERROR("Failed to allocate status page\n");
990 		ret = -ENOMEM;
991 		goto err;
992 	}
993 
994 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
995 
996 	ret = i915_gem_object_pin(obj, 4096, true);
997 	if (ret != 0) {
998 		goto err_unref;
999 	}
1000 
1001 	ring->status_page.gfx_addr = obj->gtt_offset;
1002 	ring->status_page.page_addr = (void *)kmem_alloc_nofault(kernel_map,
1003 	    PAGE_SIZE);
1004 	if (ring->status_page.page_addr == NULL) {
1005 		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
1006 		goto err_unpin;
1007 	}
1008 	pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1009 	    1);
1010 	pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1011 	    (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1012 	ring->status_page.obj = obj;
1013 	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1014 
1015 	intel_ring_setup_status_page(ring);
1016 	DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1017 			ring->name, ring->status_page.gfx_addr);
1018 
1019 	return 0;
1020 
1021 err_unpin:
1022 	i915_gem_object_unpin(obj);
1023 err_unref:
1024 	drm_gem_object_unreference(&obj->base);
1025 err:
1026 	return ret;
1027 }
1028 
1029 static
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1030 int intel_init_ring_buffer(struct drm_device *dev,
1031 			   struct intel_ring_buffer *ring)
1032 {
1033 	struct drm_i915_gem_object *obj;
1034 	int ret;
1035 
1036 	ring->dev = dev;
1037 	INIT_LIST_HEAD(&ring->active_list);
1038 	INIT_LIST_HEAD(&ring->request_list);
1039 	INIT_LIST_HEAD(&ring->gpu_write_list);
1040 
1041 	mtx_init(&ring->irq_lock, "ringb", NULL, MTX_DEF);
1042 	ring->irq_mask = ~0;
1043 
1044 	if (I915_NEED_GFX_HWS(dev)) {
1045 		ret = init_status_page(ring);
1046 		if (ret)
1047 			return ret;
1048 	}
1049 
1050 	obj = i915_gem_alloc_object(dev, ring->size);
1051 	if (obj == NULL) {
1052 		DRM_ERROR("Failed to allocate ringbuffer\n");
1053 		ret = -ENOMEM;
1054 		goto err_hws;
1055 	}
1056 
1057 	ring->obj = obj;
1058 
1059 	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1060 	if (ret)
1061 		goto err_unref;
1062 
1063 	ring->map.size = ring->size;
1064 	ring->map.offset = dev->agp->base + obj->gtt_offset;
1065 	ring->map.type = 0;
1066 	ring->map.flags = 0;
1067 	ring->map.mtrr = 0;
1068 
1069 	drm_core_ioremap_wc(&ring->map, dev);
1070 	if (ring->map.virtual == NULL) {
1071 		DRM_ERROR("Failed to map ringbuffer.\n");
1072 		ret = -EINVAL;
1073 		goto err_unpin;
1074 	}
1075 
1076 	ring->virtual_start = ring->map.virtual;
1077 	ret = ring->init(ring);
1078 	if (ret)
1079 		goto err_unmap;
1080 
1081 	/* Workaround an erratum on the i830 which causes a hang if
1082 	 * the TAIL pointer points to within the last 2 cachelines
1083 	 * of the buffer.
1084 	 */
1085 	ring->effective_size = ring->size;
1086 	if (IS_I830(ring->dev) || IS_845G(ring->dev))
1087 		ring->effective_size -= 128;
1088 
1089 	return 0;
1090 
1091 err_unmap:
1092 	drm_core_ioremapfree(&ring->map, dev);
1093 err_unpin:
1094 	i915_gem_object_unpin(obj);
1095 err_unref:
1096 	drm_gem_object_unreference(&obj->base);
1097 	ring->obj = NULL;
1098 err_hws:
1099 	cleanup_status_page(ring);
1100 	return ret;
1101 }
1102 
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1103 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1104 {
1105 	struct drm_i915_private *dev_priv;
1106 	int ret;
1107 
1108 	if (ring->obj == NULL)
1109 		return;
1110 
1111 	/* Disable the ring buffer. The ring must be idle at this point */
1112 	dev_priv = ring->dev->dev_private;
1113 	ret = intel_wait_ring_idle(ring);
1114 	I915_WRITE_CTL(ring, 0);
1115 
1116 	drm_core_ioremapfree(&ring->map, ring->dev);
1117 
1118 	i915_gem_object_unpin(ring->obj);
1119 	drm_gem_object_unreference(&ring->obj->base);
1120 	ring->obj = NULL;
1121 
1122 	if (ring->cleanup)
1123 		ring->cleanup(ring);
1124 
1125 	cleanup_status_page(ring);
1126 }
1127 
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1128 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1129 {
1130 	unsigned int *virt;
1131 	int rem = ring->size - ring->tail;
1132 
1133 	if (ring->space < rem) {
1134 		int ret = intel_wait_ring_buffer(ring, rem);
1135 		if (ret)
1136 			return ret;
1137 	}
1138 
1139 	virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1140 	rem /= 8;
1141 	while (rem--) {
1142 		*virt++ = MI_NOOP;
1143 		*virt++ = MI_NOOP;
1144 	}
1145 
1146 	ring->tail = 0;
1147 	ring->space = ring_space(ring);
1148 
1149 	return 0;
1150 }
1151 
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1152 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1153 {
1154 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1155 	bool was_interruptible;
1156 	int ret;
1157 
1158 	/* XXX As we have not yet audited all the paths to check that
1159 	 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1160 	 * allow us to be interruptible by a signal.
1161 	 */
1162 	was_interruptible = dev_priv->mm.interruptible;
1163 	dev_priv->mm.interruptible = false;
1164 
1165 	ret = i915_wait_request(ring, seqno, true);
1166 
1167 	dev_priv->mm.interruptible = was_interruptible;
1168 
1169 	return ret;
1170 }
1171 
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1172 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1173 {
1174 	struct drm_i915_gem_request *request;
1175 	u32 seqno = 0;
1176 	int ret;
1177 
1178 	i915_gem_retire_requests_ring(ring);
1179 
1180 	if (ring->last_retired_head != -1) {
1181 		ring->head = ring->last_retired_head;
1182 		ring->last_retired_head = -1;
1183 		ring->space = ring_space(ring);
1184 		if (ring->space >= n)
1185 			return 0;
1186 	}
1187 
1188 	list_for_each_entry(request, &ring->request_list, list) {
1189 		int space;
1190 
1191 		if (request->tail == -1)
1192 			continue;
1193 
1194 		space = request->tail - (ring->tail + 8);
1195 		if (space < 0)
1196 			space += ring->size;
1197 		if (space >= n) {
1198 			seqno = request->seqno;
1199 			break;
1200 		}
1201 
1202 		/* Consume this request in case we need more space than
1203 		 * is available and so need to prevent a race between
1204 		 * updating last_retired_head and direct reads of
1205 		 * I915_RING_HEAD. It also provides a nice sanity check.
1206 		 */
1207 		request->tail = -1;
1208 	}
1209 
1210 	if (seqno == 0)
1211 		return -ENOSPC;
1212 
1213 	ret = intel_ring_wait_seqno(ring, seqno);
1214 	if (ret)
1215 		return ret;
1216 
1217 	if (ring->last_retired_head == -1)
1218 		return -ENOSPC;
1219 
1220 	ring->head = ring->last_retired_head;
1221 	ring->last_retired_head = -1;
1222 	ring->space = ring_space(ring);
1223 	if (ring->space < n)
1224 		return -ENOSPC;
1225 
1226 	return 0;
1227 }
1228 
intel_wait_ring_buffer(struct intel_ring_buffer * ring,int n)1229 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1230 {
1231 	struct drm_device *dev = ring->dev;
1232 	struct drm_i915_private *dev_priv = dev->dev_private;
1233 	int end;
1234 	int ret;
1235 
1236 	ret = intel_ring_wait_request(ring, n);
1237 	if (ret != -ENOSPC)
1238 		return ret;
1239 
1240 	CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1241 	if (drm_core_check_feature(dev, DRIVER_GEM))
1242 		/* With GEM the hangcheck timer should kick us out of the loop,
1243 		 * leaving it early runs the risk of corrupting GEM state (due
1244 		 * to running on almost untested codepaths). But on resume
1245 		 * timers don't work yet, so prevent a complete hang in that
1246 		 * case by choosing an insanely large timeout. */
1247 		end = ticks + hz * 60;
1248 	else
1249 		end = ticks + hz * 3;
1250 	do {
1251 		ring->head = I915_READ_HEAD(ring);
1252 		ring->space = ring_space(ring);
1253 		if (ring->space >= n) {
1254 			CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1255 			return 0;
1256 		}
1257 
1258 #if 0
1259 		if (dev->primary->master) {
1260 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1261 			if (master_priv->sarea_priv)
1262 				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1263 		}
1264 #else
1265 		if (dev_priv->sarea_priv)
1266 			dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1267 #endif
1268 
1269 		pause("915rng", 1);
1270 		if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) {
1271 			CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1272 			return -EAGAIN;
1273 		}
1274 	} while (!time_after(ticks, end));
1275 	CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1276 	return -EBUSY;
1277 }
1278 
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1279 int intel_ring_begin(struct intel_ring_buffer *ring,
1280 		     int num_dwords)
1281 {
1282 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1283 	int n = 4*num_dwords;
1284 	int ret;
1285 
1286 	if (atomic_load_acq_int(&dev_priv->mm.wedged))
1287 		return -EIO;
1288 
1289 	if (ring->tail + n > ring->effective_size) {
1290 		ret = intel_wrap_ring_buffer(ring);
1291 		if (ret != 0)
1292 			return ret;
1293 	}
1294 
1295 	if (ring->space < n) {
1296 		ret = intel_wait_ring_buffer(ring, n);
1297 		if (ret != 0)
1298 			return ret;
1299 	}
1300 
1301 	ring->space -= n;
1302 	return 0;
1303 }
1304 
intel_ring_advance(struct intel_ring_buffer * ring)1305 void intel_ring_advance(struct intel_ring_buffer *ring)
1306 {
1307 	ring->tail &= ring->size - 1;
1308 	ring->write_tail(ring, ring->tail);
1309 }
1310 
1311 static const struct intel_ring_buffer render_ring = {
1312 	.name			= "render ring",
1313 	.id			= RCS,
1314 	.mmio_base		= RENDER_RING_BASE,
1315 	.size			= 32 * PAGE_SIZE,
1316 	.init			= init_render_ring,
1317 	.write_tail		= ring_write_tail,
1318 	.flush			= render_ring_flush,
1319 	.add_request		= render_ring_add_request,
1320 	.get_seqno		= ring_get_seqno,
1321 	.irq_get		= render_ring_get_irq,
1322 	.irq_put		= render_ring_put_irq,
1323 	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
1324 	.cleanup		= render_ring_cleanup,
1325 	.sync_to		= render_ring_sync_to,
1326 	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
1327 				   MI_SEMAPHORE_SYNC_RV,
1328 				   MI_SEMAPHORE_SYNC_RB},
1329 	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
1330 };
1331 
1332 /* ring buffer for bit-stream decoder */
1333 
1334 static const struct intel_ring_buffer bsd_ring = {
1335 	.name                   = "bsd ring",
1336 	.id			= VCS,
1337 	.mmio_base		= BSD_RING_BASE,
1338 	.size			= 32 * PAGE_SIZE,
1339 	.init			= init_ring_common,
1340 	.write_tail		= ring_write_tail,
1341 	.flush			= bsd_ring_flush,
1342 	.add_request		= ring_add_request,
1343 	.get_seqno		= ring_get_seqno,
1344 	.irq_get		= bsd_ring_get_irq,
1345 	.irq_put		= bsd_ring_put_irq,
1346 	.dispatch_execbuffer	= ring_dispatch_execbuffer,
1347 };
1348 
1349 
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,uint32_t value)1350 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1351 				     uint32_t value)
1352 {
1353 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1354 
1355 	/* Every tail move must follow the sequence below */
1356 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1357 	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1358 	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1359 	I915_WRITE(GEN6_BSD_RNCID, 0x0);
1360 
1361 	if (_intel_wait_for(ring->dev,
1362 	    (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1363 	     GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50,
1364 	    true, "915g6i") != 0)
1365 		DRM_ERROR("timed out waiting for IDLE Indicator\n");
1366 
1367 	I915_WRITE_TAIL(ring, value);
1368 	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1369 	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1370 	    GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1371 }
1372 
gen6_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate,uint32_t flush)1373 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1374 			   uint32_t invalidate, uint32_t flush)
1375 {
1376 	uint32_t cmd;
1377 	int ret;
1378 
1379 	ret = intel_ring_begin(ring, 4);
1380 	if (ret)
1381 		return ret;
1382 
1383 	cmd = MI_FLUSH_DW;
1384 	if (invalidate & I915_GEM_GPU_DOMAINS)
1385 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1386 	intel_ring_emit(ring, cmd);
1387 	intel_ring_emit(ring, 0);
1388 	intel_ring_emit(ring, 0);
1389 	intel_ring_emit(ring, MI_NOOP);
1390 	intel_ring_advance(ring);
1391 	return 0;
1392 }
1393 
1394 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t len)1395 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1396 			      uint32_t offset, uint32_t len)
1397 {
1398 	int ret;
1399 
1400 	ret = intel_ring_begin(ring, 2);
1401 	if (ret)
1402 		return ret;
1403 
1404 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1405 	/* bit0-7 is the length on GEN6+ */
1406 	intel_ring_emit(ring, offset);
1407 	intel_ring_advance(ring);
1408 
1409 	return 0;
1410 }
1411 
1412 static bool
gen6_render_ring_get_irq(struct intel_ring_buffer * ring)1413 gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1414 {
1415 	return gen6_ring_get_irq(ring,
1416 				 GT_USER_INTERRUPT,
1417 				 GEN6_RENDER_USER_INTERRUPT);
1418 }
1419 
1420 static void
gen6_render_ring_put_irq(struct intel_ring_buffer * ring)1421 gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1422 {
1423 	return gen6_ring_put_irq(ring,
1424 				 GT_USER_INTERRUPT,
1425 				 GEN6_RENDER_USER_INTERRUPT);
1426 }
1427 
1428 static bool
gen6_bsd_ring_get_irq(struct intel_ring_buffer * ring)1429 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1430 {
1431 	return gen6_ring_get_irq(ring,
1432 				 GT_GEN6_BSD_USER_INTERRUPT,
1433 				 GEN6_BSD_USER_INTERRUPT);
1434 }
1435 
1436 static void
gen6_bsd_ring_put_irq(struct intel_ring_buffer * ring)1437 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1438 {
1439 	return gen6_ring_put_irq(ring,
1440 				 GT_GEN6_BSD_USER_INTERRUPT,
1441 				 GEN6_BSD_USER_INTERRUPT);
1442 }
1443 
1444 /* ring buffer for Video Codec for Gen6+ */
1445 static const struct intel_ring_buffer gen6_bsd_ring = {
1446 	.name			= "gen6 bsd ring",
1447 	.id			= VCS,
1448 	.mmio_base		= GEN6_BSD_RING_BASE,
1449 	.size			= 32 * PAGE_SIZE,
1450 	.init			= init_ring_common,
1451 	.write_tail		= gen6_bsd_ring_write_tail,
1452 	.flush			= gen6_ring_flush,
1453 	.add_request		= gen6_add_request,
1454 	.get_seqno		= gen6_ring_get_seqno,
1455 	.irq_get		= gen6_bsd_ring_get_irq,
1456 	.irq_put		= gen6_bsd_ring_put_irq,
1457 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1458 	.sync_to		= gen6_bsd_ring_sync_to,
1459 	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
1460 				   MI_SEMAPHORE_SYNC_INVALID,
1461 				   MI_SEMAPHORE_SYNC_VB},
1462 	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
1463 };
1464 
1465 /* Blitter support (SandyBridge+) */
1466 
1467 static bool
blt_ring_get_irq(struct intel_ring_buffer * ring)1468 blt_ring_get_irq(struct intel_ring_buffer *ring)
1469 {
1470 	return gen6_ring_get_irq(ring,
1471 				 GT_BLT_USER_INTERRUPT,
1472 				 GEN6_BLITTER_USER_INTERRUPT);
1473 }
1474 
1475 static void
blt_ring_put_irq(struct intel_ring_buffer * ring)1476 blt_ring_put_irq(struct intel_ring_buffer *ring)
1477 {
1478 	gen6_ring_put_irq(ring,
1479 			  GT_BLT_USER_INTERRUPT,
1480 			  GEN6_BLITTER_USER_INTERRUPT);
1481 }
1482 
blt_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate,uint32_t flush)1483 static int blt_ring_flush(struct intel_ring_buffer *ring,
1484 			  uint32_t invalidate, uint32_t flush)
1485 {
1486 	uint32_t cmd;
1487 	int ret;
1488 
1489 	ret = intel_ring_begin(ring, 4);
1490 	if (ret)
1491 		return ret;
1492 
1493 	cmd = MI_FLUSH_DW;
1494 	if (invalidate & I915_GEM_DOMAIN_RENDER)
1495 		cmd |= MI_INVALIDATE_TLB;
1496 	intel_ring_emit(ring, cmd);
1497 	intel_ring_emit(ring, 0);
1498 	intel_ring_emit(ring, 0);
1499 	intel_ring_emit(ring, MI_NOOP);
1500 	intel_ring_advance(ring);
1501 	return 0;
1502 }
1503 
1504 static const struct intel_ring_buffer gen6_blt_ring = {
1505 	.name			= "blt ring",
1506 	.id			= BCS,
1507 	.mmio_base		= BLT_RING_BASE,
1508 	.size			= 32 * PAGE_SIZE,
1509 	.init			= init_ring_common,
1510 	.write_tail		= ring_write_tail,
1511 	.flush			= blt_ring_flush,
1512 	.add_request		= gen6_add_request,
1513 	.get_seqno		= gen6_ring_get_seqno,
1514 	.irq_get		= blt_ring_get_irq,
1515 	.irq_put		= blt_ring_put_irq,
1516 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
1517 	.sync_to		= gen6_blt_ring_sync_to,
1518 	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
1519 				   MI_SEMAPHORE_SYNC_BV,
1520 				   MI_SEMAPHORE_SYNC_INVALID},
1521 	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
1522 };
1523 
intel_init_render_ring_buffer(struct drm_device * dev)1524 int intel_init_render_ring_buffer(struct drm_device *dev)
1525 {
1526 	drm_i915_private_t *dev_priv = dev->dev_private;
1527 	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1528 
1529 	*ring = render_ring;
1530 	if (INTEL_INFO(dev)->gen >= 6) {
1531 		ring->add_request = gen6_add_request;
1532 		ring->flush = gen6_render_ring_flush;
1533 		ring->irq_get = gen6_render_ring_get_irq;
1534 		ring->irq_put = gen6_render_ring_put_irq;
1535 		ring->get_seqno = gen6_ring_get_seqno;
1536 	} else if (IS_GEN5(dev)) {
1537 		ring->add_request = pc_render_add_request;
1538 		ring->get_seqno = pc_render_get_seqno;
1539 	}
1540 
1541 	if (!I915_NEED_GFX_HWS(dev)) {
1542 		ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1543 		memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1544 	}
1545 
1546 	return intel_init_ring_buffer(dev, ring);
1547 }
1548 
intel_render_ring_init_dri(struct drm_device * dev,uint64_t start,uint32_t size)1549 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start,
1550     uint32_t size)
1551 {
1552 	drm_i915_private_t *dev_priv = dev->dev_private;
1553 	struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1554 
1555 	*ring = render_ring;
1556 	if (INTEL_INFO(dev)->gen >= 6) {
1557 		ring->add_request = gen6_add_request;
1558 		ring->irq_get = gen6_render_ring_get_irq;
1559 		ring->irq_put = gen6_render_ring_put_irq;
1560 	} else if (IS_GEN5(dev)) {
1561 		ring->add_request = pc_render_add_request;
1562 		ring->get_seqno = pc_render_get_seqno;
1563 	}
1564 
1565 	ring->dev = dev;
1566 	INIT_LIST_HEAD(&ring->active_list);
1567 	INIT_LIST_HEAD(&ring->request_list);
1568 	INIT_LIST_HEAD(&ring->gpu_write_list);
1569 
1570 	ring->size = size;
1571 	ring->effective_size = ring->size;
1572 	if (IS_I830(ring->dev))
1573 		ring->effective_size -= 128;
1574 
1575 	ring->map.offset = start;
1576 	ring->map.size = size;
1577 	ring->map.type = 0;
1578 	ring->map.flags = 0;
1579 	ring->map.mtrr = 0;
1580 
1581 	drm_core_ioremap_wc(&ring->map, dev);
1582 	if (ring->map.virtual == NULL) {
1583 		DRM_ERROR("can not ioremap virtual address for"
1584 			  " ring buffer\n");
1585 		return -ENOMEM;
1586 	}
1587 
1588 	ring->virtual_start = (void *)ring->map.virtual;
1589 	return 0;
1590 }
1591 
intel_init_bsd_ring_buffer(struct drm_device * dev)1592 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1593 {
1594 	drm_i915_private_t *dev_priv = dev->dev_private;
1595 	struct intel_ring_buffer *ring = &dev_priv->rings[VCS];
1596 
1597 	if (IS_GEN6(dev) || IS_GEN7(dev))
1598 		*ring = gen6_bsd_ring;
1599 	else
1600 		*ring = bsd_ring;
1601 
1602 	return intel_init_ring_buffer(dev, ring);
1603 }
1604 
intel_init_blt_ring_buffer(struct drm_device * dev)1605 int intel_init_blt_ring_buffer(struct drm_device *dev)
1606 {
1607 	drm_i915_private_t *dev_priv = dev->dev_private;
1608 	struct intel_ring_buffer *ring = &dev_priv->rings[BCS];
1609 
1610 	*ring = gen6_blt_ring;
1611 
1612 	return intel_init_ring_buffer(dev, ring);
1613 }
1614