1 /*
2 * Copyright © 2008-2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Zou Nan hai <nanhai.zou@intel.com>
26 * Xiang Hai hao<haihao.xiang@intel.com>
27 *
28 */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD: stable/9/sys/dev/drm2/i915/intel_ringbuffer.c 254023 2013-08-07 05:55:02Z kib $");
32
33 #include <dev/drm2/drmP.h>
34 #include <dev/drm2/drm.h>
35 #include <dev/drm2/i915/i915_drm.h>
36 #include <dev/drm2/i915/i915_drv.h>
37 #include <dev/drm2/i915/intel_drv.h>
38 #include <dev/drm2/i915/intel_ringbuffer.h>
39 #include <sys/sched.h>
40 #include <sys/sf_buf.h>
41
42 /*
43 * 965+ support PIPE_CONTROL commands, which provide finer grained control
44 * over cache flushing.
45 */
46 struct pipe_control {
47 struct drm_i915_gem_object *obj;
48 volatile u32 *cpu_page;
49 u32 gtt_offset;
50 };
51
52 void
i915_trace_irq_get(struct intel_ring_buffer * ring,uint32_t seqno)53 i915_trace_irq_get(struct intel_ring_buffer *ring, uint32_t seqno)
54 {
55
56 if (ring->trace_irq_seqno == 0) {
57 mtx_lock(&ring->irq_lock);
58 if (ring->irq_get(ring))
59 ring->trace_irq_seqno = seqno;
60 mtx_unlock(&ring->irq_lock);
61 }
62 }
63
ring_space(struct intel_ring_buffer * ring)64 static inline int ring_space(struct intel_ring_buffer *ring)
65 {
66 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
67 if (space < 0)
68 space += ring->size;
69 return space;
70 }
71
72 static int
render_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate_domains,uint32_t flush_domains)73 render_ring_flush(struct intel_ring_buffer *ring,
74 uint32_t invalidate_domains,
75 uint32_t flush_domains)
76 {
77 struct drm_device *dev = ring->dev;
78 uint32_t cmd;
79 int ret;
80
81 /*
82 * read/write caches:
83 *
84 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
85 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
86 * also flushed at 2d versus 3d pipeline switches.
87 *
88 * read-only caches:
89 *
90 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
91 * MI_READ_FLUSH is set, and is always flushed on 965.
92 *
93 * I915_GEM_DOMAIN_COMMAND may not exist?
94 *
95 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
96 * invalidated when MI_EXE_FLUSH is set.
97 *
98 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
99 * invalidated with every MI_FLUSH.
100 *
101 * TLBs:
102 *
103 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
104 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
105 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
106 * are flushed at any MI_FLUSH.
107 */
108
109 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
110 if ((invalidate_domains|flush_domains) &
111 I915_GEM_DOMAIN_RENDER)
112 cmd &= ~MI_NO_WRITE_FLUSH;
113 if (INTEL_INFO(dev)->gen < 4) {
114 /*
115 * On the 965, the sampler cache always gets flushed
116 * and this bit is reserved.
117 */
118 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
119 cmd |= MI_READ_FLUSH;
120 }
121 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
122 cmd |= MI_EXE_FLUSH;
123
124 if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
125 (IS_G4X(dev) || IS_GEN5(dev)))
126 cmd |= MI_INVALIDATE_ISP;
127
128 ret = intel_ring_begin(ring, 2);
129 if (ret)
130 return ret;
131
132 intel_ring_emit(ring, cmd);
133 intel_ring_emit(ring, MI_NOOP);
134 intel_ring_advance(ring);
135
136 return 0;
137 }
138
139 /**
140 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
141 * implementing two workarounds on gen6. From section 1.4.7.1
142 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
143 *
144 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
145 * produced by non-pipelined state commands), software needs to first
146 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
147 * 0.
148 *
149 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
150 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
151 *
152 * And the workaround for these two requires this workaround first:
153 *
154 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
155 * BEFORE the pipe-control with a post-sync op and no write-cache
156 * flushes.
157 *
158 * And this last workaround is tricky because of the requirements on
159 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
160 * volume 2 part 1:
161 *
162 * "1 of the following must also be set:
163 * - Render Target Cache Flush Enable ([12] of DW1)
164 * - Depth Cache Flush Enable ([0] of DW1)
165 * - Stall at Pixel Scoreboard ([1] of DW1)
166 * - Depth Stall ([13] of DW1)
167 * - Post-Sync Operation ([13] of DW1)
168 * - Notify Enable ([8] of DW1)"
169 *
170 * The cache flushes require the workaround flush that triggered this
171 * one, so we can't use it. Depth stall would trigger the same.
172 * Post-sync nonzero is what triggered this second workaround, so we
173 * can't use that one either. Notify enable is IRQs, which aren't
174 * really our business. That leaves only stall at scoreboard.
175 */
176 static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer * ring)177 intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
178 {
179 struct pipe_control *pc = ring->private;
180 u32 scratch_addr = pc->gtt_offset + 128;
181 int ret;
182
183
184 ret = intel_ring_begin(ring, 6);
185 if (ret)
186 return ret;
187
188 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
189 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
190 PIPE_CONTROL_STALL_AT_SCOREBOARD);
191 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
192 intel_ring_emit(ring, 0); /* low dword */
193 intel_ring_emit(ring, 0); /* high dword */
194 intel_ring_emit(ring, MI_NOOP);
195 intel_ring_advance(ring);
196
197 ret = intel_ring_begin(ring, 6);
198 if (ret)
199 return ret;
200
201 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
202 intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
203 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
204 intel_ring_emit(ring, 0);
205 intel_ring_emit(ring, 0);
206 intel_ring_emit(ring, MI_NOOP);
207 intel_ring_advance(ring);
208
209 return 0;
210 }
211
212 static int
gen6_render_ring_flush(struct intel_ring_buffer * ring,u32 invalidate_domains,u32 flush_domains)213 gen6_render_ring_flush(struct intel_ring_buffer *ring,
214 u32 invalidate_domains, u32 flush_domains)
215 {
216 u32 flags = 0;
217 struct pipe_control *pc = ring->private;
218 u32 scratch_addr = pc->gtt_offset + 128;
219 int ret;
220
221 /* Force SNB workarounds for PIPE_CONTROL flushes */
222 intel_emit_post_sync_nonzero_flush(ring);
223
224 /* Just flush everything. Experiments have shown that reducing the
225 * number of bits based on the write domains has little performance
226 * impact.
227 */
228 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
229 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
230 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
231 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
232 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
233 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
234 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
235
236 ret = intel_ring_begin(ring, 6);
237 if (ret)
238 return ret;
239
240 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241 intel_ring_emit(ring, flags);
242 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
243 intel_ring_emit(ring, 0); /* lower dword */
244 intel_ring_emit(ring, 0); /* uppwer dword */
245 intel_ring_emit(ring, MI_NOOP);
246 intel_ring_advance(ring);
247
248 return 0;
249 }
250
ring_write_tail(struct intel_ring_buffer * ring,uint32_t value)251 static void ring_write_tail(struct intel_ring_buffer *ring,
252 uint32_t value)
253 {
254 drm_i915_private_t *dev_priv = ring->dev->dev_private;
255 I915_WRITE_TAIL(ring, value);
256 }
257
intel_ring_get_active_head(struct intel_ring_buffer * ring)258 u32 intel_ring_get_active_head(struct intel_ring_buffer *ring)
259 {
260 drm_i915_private_t *dev_priv = ring->dev->dev_private;
261 uint32_t acthd_reg = INTEL_INFO(ring->dev)->gen >= 4 ?
262 RING_ACTHD(ring->mmio_base) : ACTHD;
263
264 return I915_READ(acthd_reg);
265 }
266
init_ring_common(struct intel_ring_buffer * ring)267 static int init_ring_common(struct intel_ring_buffer *ring)
268 {
269 drm_i915_private_t *dev_priv = ring->dev->dev_private;
270 struct drm_i915_gem_object *obj = ring->obj;
271 uint32_t head;
272
273 /* Stop the ring if it's running. */
274 I915_WRITE_CTL(ring, 0);
275 I915_WRITE_HEAD(ring, 0);
276 ring->write_tail(ring, 0);
277
278 /* Initialize the ring. */
279 I915_WRITE_START(ring, obj->gtt_offset);
280 head = I915_READ_HEAD(ring) & HEAD_ADDR;
281
282 /* G45 ring initialization fails to reset head to zero */
283 if (head != 0) {
284 DRM_DEBUG("%s head not reset to zero "
285 "ctl %08x head %08x tail %08x start %08x\n",
286 ring->name,
287 I915_READ_CTL(ring),
288 I915_READ_HEAD(ring),
289 I915_READ_TAIL(ring),
290 I915_READ_START(ring));
291
292 I915_WRITE_HEAD(ring, 0);
293
294 if (I915_READ_HEAD(ring) & HEAD_ADDR) {
295 DRM_ERROR("failed to set %s head to zero "
296 "ctl %08x head %08x tail %08x start %08x\n",
297 ring->name,
298 I915_READ_CTL(ring),
299 I915_READ_HEAD(ring),
300 I915_READ_TAIL(ring),
301 I915_READ_START(ring));
302 }
303 }
304
305 I915_WRITE_CTL(ring,
306 ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
307 | RING_VALID);
308
309 /* If the head is still not zero, the ring is dead */
310 if (_intel_wait_for(ring->dev,
311 (I915_READ_CTL(ring) & RING_VALID) != 0 &&
312 I915_READ_START(ring) == obj->gtt_offset &&
313 (I915_READ_HEAD(ring) & HEAD_ADDR) == 0,
314 50, 1, "915rii")) {
315 DRM_ERROR("%s initialization failed "
316 "ctl %08x head %08x tail %08x start %08x\n",
317 ring->name,
318 I915_READ_CTL(ring),
319 I915_READ_HEAD(ring),
320 I915_READ_TAIL(ring),
321 I915_READ_START(ring));
322 return -EIO;
323 }
324
325 if (!drm_core_check_feature(ring->dev, DRIVER_MODESET))
326 i915_kernel_lost_context(ring->dev);
327 else {
328 ring->head = I915_READ_HEAD(ring);
329 ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
330 ring->space = ring_space(ring);
331 }
332
333 return 0;
334 }
335
336 static int
init_pipe_control(struct intel_ring_buffer * ring)337 init_pipe_control(struct intel_ring_buffer *ring)
338 {
339 struct pipe_control *pc;
340 struct drm_i915_gem_object *obj;
341 int ret;
342
343 if (ring->private)
344 return 0;
345
346 pc = malloc(sizeof(*pc), DRM_I915_GEM, M_WAITOK);
347 if (!pc)
348 return -ENOMEM;
349
350 obj = i915_gem_alloc_object(ring->dev, 4096);
351 if (obj == NULL) {
352 DRM_ERROR("Failed to allocate seqno page\n");
353 ret = -ENOMEM;
354 goto err;
355 }
356
357 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
358
359 ret = i915_gem_object_pin(obj, 4096, true);
360 if (ret)
361 goto err_unref;
362
363 pc->gtt_offset = obj->gtt_offset;
364 pc->cpu_page = (uint32_t *)kmem_alloc_nofault(kernel_map, PAGE_SIZE);
365 if (pc->cpu_page == NULL)
366 goto err_unpin;
367 pmap_qenter((uintptr_t)pc->cpu_page, &obj->pages[0], 1);
368 pmap_invalidate_cache_range((vm_offset_t)pc->cpu_page,
369 (vm_offset_t)pc->cpu_page + PAGE_SIZE);
370
371 pc->obj = obj;
372 ring->private = pc;
373 return 0;
374
375 err_unpin:
376 i915_gem_object_unpin(obj);
377 err_unref:
378 drm_gem_object_unreference(&obj->base);
379 err:
380 free(pc, DRM_I915_GEM);
381 return ret;
382 }
383
384 static void
cleanup_pipe_control(struct intel_ring_buffer * ring)385 cleanup_pipe_control(struct intel_ring_buffer *ring)
386 {
387 struct pipe_control *pc = ring->private;
388 struct drm_i915_gem_object *obj;
389
390 if (!ring->private)
391 return;
392
393 obj = pc->obj;
394 pmap_qremove((vm_offset_t)pc->cpu_page, 1);
395 kmem_free(kernel_map, (uintptr_t)pc->cpu_page, PAGE_SIZE);
396 i915_gem_object_unpin(obj);
397 drm_gem_object_unreference(&obj->base);
398
399 free(pc, DRM_I915_GEM);
400 ring->private = NULL;
401 }
402
init_render_ring(struct intel_ring_buffer * ring)403 static int init_render_ring(struct intel_ring_buffer *ring)
404 {
405 struct drm_device *dev = ring->dev;
406 struct drm_i915_private *dev_priv = dev->dev_private;
407 int ret = init_ring_common(ring);
408
409 if (INTEL_INFO(dev)->gen > 3) {
410 int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
411 I915_WRITE(MI_MODE, mode);
412 if (IS_GEN7(dev))
413 I915_WRITE(GFX_MODE_GEN7,
414 GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
415 GFX_MODE_ENABLE(GFX_REPLAY_MODE));
416 }
417
418 if (INTEL_INFO(dev)->gen >= 5) {
419 ret = init_pipe_control(ring);
420 if (ret)
421 return ret;
422 }
423
424
425 if (IS_GEN6(dev)) {
426 /* From the Sandybridge PRM, volume 1 part 3, page 24:
427 * "If this bit is set, STCunit will have LRA as replacement
428 * policy. [...] This bit must be reset. LRA replacement
429 * policy is not supported."
430 */
431 I915_WRITE(CACHE_MODE_0,
432 CM0_STC_EVICT_DISABLE_LRA_SNB << CM0_MASK_SHIFT);
433 }
434
435 if (INTEL_INFO(dev)->gen >= 6) {
436 I915_WRITE(INSTPM,
437 INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
438 }
439
440 return ret;
441 }
442
render_ring_cleanup(struct intel_ring_buffer * ring)443 static void render_ring_cleanup(struct intel_ring_buffer *ring)
444 {
445 if (!ring->private)
446 return;
447
448 cleanup_pipe_control(ring);
449 }
450
451 static void
update_mboxes(struct intel_ring_buffer * ring,u32 seqno,u32 mmio_offset)452 update_mboxes(struct intel_ring_buffer *ring,
453 u32 seqno,
454 u32 mmio_offset)
455 {
456 intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
457 MI_SEMAPHORE_GLOBAL_GTT |
458 MI_SEMAPHORE_REGISTER |
459 MI_SEMAPHORE_UPDATE);
460 intel_ring_emit(ring, seqno);
461 intel_ring_emit(ring, mmio_offset);
462 }
463
464 /**
465 * gen6_add_request - Update the semaphore mailbox registers
466 *
467 * @ring - ring that is adding a request
468 * @seqno - return seqno stuck into the ring
469 *
470 * Update the mailbox registers in the *other* rings with the current seqno.
471 * This acts like a signal in the canonical semaphore.
472 */
473 static int
gen6_add_request(struct intel_ring_buffer * ring,u32 * seqno)474 gen6_add_request(struct intel_ring_buffer *ring,
475 u32 *seqno)
476 {
477 u32 mbox1_reg;
478 u32 mbox2_reg;
479 int ret;
480
481 ret = intel_ring_begin(ring, 10);
482 if (ret)
483 return ret;
484
485 mbox1_reg = ring->signal_mbox[0];
486 mbox2_reg = ring->signal_mbox[1];
487
488 *seqno = i915_gem_next_request_seqno(ring);
489
490 update_mboxes(ring, *seqno, mbox1_reg);
491 update_mboxes(ring, *seqno, mbox2_reg);
492 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
493 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
494 intel_ring_emit(ring, *seqno);
495 intel_ring_emit(ring, MI_USER_INTERRUPT);
496 intel_ring_advance(ring);
497
498 return 0;
499 }
500
501 /**
502 * intel_ring_sync - sync the waiter to the signaller on seqno
503 *
504 * @waiter - ring that is waiting
505 * @signaller - ring which has, or will signal
506 * @seqno - seqno which the waiter will block on
507 */
508 static int
intel_ring_sync(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,int ring,u32 seqno)509 intel_ring_sync(struct intel_ring_buffer *waiter,
510 struct intel_ring_buffer *signaller,
511 int ring,
512 u32 seqno)
513 {
514 int ret;
515 u32 dw1 = MI_SEMAPHORE_MBOX |
516 MI_SEMAPHORE_COMPARE |
517 MI_SEMAPHORE_REGISTER;
518
519 ret = intel_ring_begin(waiter, 4);
520 if (ret)
521 return ret;
522
523 intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
524 intel_ring_emit(waiter, seqno);
525 intel_ring_emit(waiter, 0);
526 intel_ring_emit(waiter, MI_NOOP);
527 intel_ring_advance(waiter);
528
529 return 0;
530 }
531
532 int render_ring_sync_to(struct intel_ring_buffer *waiter,
533 struct intel_ring_buffer *signaller, u32 seqno);
534 int gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
535 struct intel_ring_buffer *signaller, u32 seqno);
536 int gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
537 struct intel_ring_buffer *signaller, u32 seqno);
538
539 /* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
540 int
render_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)541 render_ring_sync_to(struct intel_ring_buffer *waiter,
542 struct intel_ring_buffer *signaller,
543 u32 seqno)
544 {
545 KASSERT(signaller->semaphore_register[RCS] != MI_SEMAPHORE_SYNC_INVALID,
546 ("valid RCS semaphore"));
547 return intel_ring_sync(waiter,
548 signaller,
549 RCS,
550 seqno);
551 }
552
553 /* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
554 int
gen6_bsd_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)555 gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
556 struct intel_ring_buffer *signaller,
557 u32 seqno)
558 {
559 KASSERT(signaller->semaphore_register[VCS] != MI_SEMAPHORE_SYNC_INVALID,
560 ("Valid VCS semaphore"));
561 return intel_ring_sync(waiter,
562 signaller,
563 VCS,
564 seqno);
565 }
566
567 /* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
568 int
gen6_blt_ring_sync_to(struct intel_ring_buffer * waiter,struct intel_ring_buffer * signaller,u32 seqno)569 gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
570 struct intel_ring_buffer *signaller,
571 u32 seqno)
572 {
573 KASSERT(signaller->semaphore_register[BCS] != MI_SEMAPHORE_SYNC_INVALID,
574 ("Valid BCS semaphore"));
575 return intel_ring_sync(waiter,
576 signaller,
577 BCS,
578 seqno);
579 }
580
581 #define PIPE_CONTROL_FLUSH(ring__, addr__) \
582 do { \
583 intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
584 PIPE_CONTROL_DEPTH_STALL); \
585 intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
586 intel_ring_emit(ring__, 0); \
587 intel_ring_emit(ring__, 0); \
588 } while (0)
589
590 static int
pc_render_add_request(struct intel_ring_buffer * ring,uint32_t * result)591 pc_render_add_request(struct intel_ring_buffer *ring,
592 uint32_t *result)
593 {
594 u32 seqno = i915_gem_next_request_seqno(ring);
595 struct pipe_control *pc = ring->private;
596 u32 scratch_addr = pc->gtt_offset + 128;
597 int ret;
598
599 /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
600 * incoherent with writes to memory, i.e. completely fubar,
601 * so we need to use PIPE_NOTIFY instead.
602 *
603 * However, we also need to workaround the qword write
604 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
605 * memory before requesting an interrupt.
606 */
607 ret = intel_ring_begin(ring, 32);
608 if (ret)
609 return ret;
610
611 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
612 PIPE_CONTROL_WRITE_FLUSH |
613 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
614 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
615 intel_ring_emit(ring, seqno);
616 intel_ring_emit(ring, 0);
617 PIPE_CONTROL_FLUSH(ring, scratch_addr);
618 scratch_addr += 128; /* write to separate cachelines */
619 PIPE_CONTROL_FLUSH(ring, scratch_addr);
620 scratch_addr += 128;
621 PIPE_CONTROL_FLUSH(ring, scratch_addr);
622 scratch_addr += 128;
623 PIPE_CONTROL_FLUSH(ring, scratch_addr);
624 scratch_addr += 128;
625 PIPE_CONTROL_FLUSH(ring, scratch_addr);
626 scratch_addr += 128;
627 PIPE_CONTROL_FLUSH(ring, scratch_addr);
628 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
629 PIPE_CONTROL_WRITE_FLUSH |
630 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
631 PIPE_CONTROL_NOTIFY);
632 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
633 intel_ring_emit(ring, seqno);
634 intel_ring_emit(ring, 0);
635 intel_ring_advance(ring);
636
637 *result = seqno;
638 return 0;
639 }
640
641 static int
render_ring_add_request(struct intel_ring_buffer * ring,uint32_t * result)642 render_ring_add_request(struct intel_ring_buffer *ring,
643 uint32_t *result)
644 {
645 u32 seqno = i915_gem_next_request_seqno(ring);
646 int ret;
647
648 ret = intel_ring_begin(ring, 4);
649 if (ret)
650 return ret;
651
652 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
653 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
654 intel_ring_emit(ring, seqno);
655 intel_ring_emit(ring, MI_USER_INTERRUPT);
656 intel_ring_advance(ring);
657
658 *result = seqno;
659 return 0;
660 }
661
662 static u32
gen6_ring_get_seqno(struct intel_ring_buffer * ring)663 gen6_ring_get_seqno(struct intel_ring_buffer *ring)
664 {
665 struct drm_device *dev = ring->dev;
666
667 /* Workaround to force correct ordering between irq and seqno writes on
668 * ivb (and maybe also on snb) by reading from a CS register (like
669 * ACTHD) before reading the status page. */
670 if (/* IS_GEN6(dev) || */IS_GEN7(dev))
671 intel_ring_get_active_head(ring);
672 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
673 }
674
675 static uint32_t
ring_get_seqno(struct intel_ring_buffer * ring)676 ring_get_seqno(struct intel_ring_buffer *ring)
677 {
678 if (ring->status_page.page_addr == NULL)
679 return (-1);
680 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
681 }
682
683 static uint32_t
pc_render_get_seqno(struct intel_ring_buffer * ring)684 pc_render_get_seqno(struct intel_ring_buffer *ring)
685 {
686 struct pipe_control *pc = ring->private;
687 if (pc != NULL)
688 return pc->cpu_page[0];
689 else
690 return (-1);
691 }
692
693 static void
ironlake_enable_irq(drm_i915_private_t * dev_priv,uint32_t mask)694 ironlake_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
695 {
696 dev_priv->gt_irq_mask &= ~mask;
697 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
698 POSTING_READ(GTIMR);
699 }
700
701 static void
ironlake_disable_irq(drm_i915_private_t * dev_priv,uint32_t mask)702 ironlake_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
703 {
704 dev_priv->gt_irq_mask |= mask;
705 I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
706 POSTING_READ(GTIMR);
707 }
708
709 static void
i915_enable_irq(drm_i915_private_t * dev_priv,uint32_t mask)710 i915_enable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
711 {
712 dev_priv->irq_mask &= ~mask;
713 I915_WRITE(IMR, dev_priv->irq_mask);
714 POSTING_READ(IMR);
715 }
716
717 static void
i915_disable_irq(drm_i915_private_t * dev_priv,uint32_t mask)718 i915_disable_irq(drm_i915_private_t *dev_priv, uint32_t mask)
719 {
720 dev_priv->irq_mask |= mask;
721 I915_WRITE(IMR, dev_priv->irq_mask);
722 POSTING_READ(IMR);
723 }
724
725 static bool
render_ring_get_irq(struct intel_ring_buffer * ring)726 render_ring_get_irq(struct intel_ring_buffer *ring)
727 {
728 struct drm_device *dev = ring->dev;
729 drm_i915_private_t *dev_priv = dev->dev_private;
730
731 if (!dev->irq_enabled)
732 return false;
733
734 mtx_assert(&ring->irq_lock, MA_OWNED);
735 if (ring->irq_refcount++ == 0) {
736 if (HAS_PCH_SPLIT(dev))
737 ironlake_enable_irq(dev_priv,
738 GT_PIPE_NOTIFY | GT_USER_INTERRUPT);
739 else
740 i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
741 }
742
743 return true;
744 }
745
746 static void
render_ring_put_irq(struct intel_ring_buffer * ring)747 render_ring_put_irq(struct intel_ring_buffer *ring)
748 {
749 struct drm_device *dev = ring->dev;
750 drm_i915_private_t *dev_priv = dev->dev_private;
751
752 mtx_assert(&ring->irq_lock, MA_OWNED);
753 if (--ring->irq_refcount == 0) {
754 if (HAS_PCH_SPLIT(dev))
755 ironlake_disable_irq(dev_priv,
756 GT_USER_INTERRUPT |
757 GT_PIPE_NOTIFY);
758 else
759 i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
760 }
761 }
762
intel_ring_setup_status_page(struct intel_ring_buffer * ring)763 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
764 {
765 struct drm_device *dev = ring->dev;
766 drm_i915_private_t *dev_priv = dev->dev_private;
767 uint32_t mmio = 0;
768
769 /* The ring status page addresses are no longer next to the rest of
770 * the ring registers as of gen7.
771 */
772 if (IS_GEN7(dev)) {
773 switch (ring->id) {
774 case RCS:
775 mmio = RENDER_HWS_PGA_GEN7;
776 break;
777 case BCS:
778 mmio = BLT_HWS_PGA_GEN7;
779 break;
780 case VCS:
781 mmio = BSD_HWS_PGA_GEN7;
782 break;
783 }
784 } else if (IS_GEN6(dev)) {
785 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
786 } else {
787 mmio = RING_HWS_PGA(ring->mmio_base);
788 }
789
790 I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
791 POSTING_READ(mmio);
792 }
793
794 static int
bsd_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate_domains,uint32_t flush_domains)795 bsd_ring_flush(struct intel_ring_buffer *ring,
796 uint32_t invalidate_domains,
797 uint32_t flush_domains)
798 {
799 int ret;
800
801 ret = intel_ring_begin(ring, 2);
802 if (ret)
803 return ret;
804
805 intel_ring_emit(ring, MI_FLUSH);
806 intel_ring_emit(ring, MI_NOOP);
807 intel_ring_advance(ring);
808 return 0;
809 }
810
811 static int
ring_add_request(struct intel_ring_buffer * ring,uint32_t * result)812 ring_add_request(struct intel_ring_buffer *ring,
813 uint32_t *result)
814 {
815 uint32_t seqno;
816 int ret;
817
818 ret = intel_ring_begin(ring, 4);
819 if (ret)
820 return ret;
821
822 seqno = i915_gem_next_request_seqno(ring);
823
824 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
825 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
826 intel_ring_emit(ring, seqno);
827 intel_ring_emit(ring, MI_USER_INTERRUPT);
828 intel_ring_advance(ring);
829
830 *result = seqno;
831 return 0;
832 }
833
834 static bool
gen6_ring_get_irq(struct intel_ring_buffer * ring,uint32_t gflag,uint32_t rflag)835 gen6_ring_get_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
836 {
837 struct drm_device *dev = ring->dev;
838 drm_i915_private_t *dev_priv = dev->dev_private;
839
840 if (!dev->irq_enabled)
841 return false;
842
843 gen6_gt_force_wake_get(dev_priv);
844
845 mtx_assert(&ring->irq_lock, MA_OWNED);
846 if (ring->irq_refcount++ == 0) {
847 ring->irq_mask &= ~rflag;
848 I915_WRITE_IMR(ring, ring->irq_mask);
849 ironlake_enable_irq(dev_priv, gflag);
850 }
851
852 return true;
853 }
854
855 static void
gen6_ring_put_irq(struct intel_ring_buffer * ring,uint32_t gflag,uint32_t rflag)856 gen6_ring_put_irq(struct intel_ring_buffer *ring, uint32_t gflag, uint32_t rflag)
857 {
858 struct drm_device *dev = ring->dev;
859 drm_i915_private_t *dev_priv = dev->dev_private;
860
861 mtx_assert(&ring->irq_lock, MA_OWNED);
862 if (--ring->irq_refcount == 0) {
863 ring->irq_mask |= rflag;
864 I915_WRITE_IMR(ring, ring->irq_mask);
865 ironlake_disable_irq(dev_priv, gflag);
866 }
867
868 gen6_gt_force_wake_put(dev_priv);
869 }
870
871 static bool
bsd_ring_get_irq(struct intel_ring_buffer * ring)872 bsd_ring_get_irq(struct intel_ring_buffer *ring)
873 {
874 struct drm_device *dev = ring->dev;
875 drm_i915_private_t *dev_priv = dev->dev_private;
876
877 if (!dev->irq_enabled)
878 return false;
879
880 mtx_assert(&ring->irq_lock, MA_OWNED);
881 if (ring->irq_refcount++ == 0) {
882 if (IS_G4X(dev))
883 i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
884 else
885 ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
886 }
887
888 return true;
889 }
890 static void
bsd_ring_put_irq(struct intel_ring_buffer * ring)891 bsd_ring_put_irq(struct intel_ring_buffer *ring)
892 {
893 struct drm_device *dev = ring->dev;
894 drm_i915_private_t *dev_priv = dev->dev_private;
895
896 mtx_assert(&ring->irq_lock, MA_OWNED);
897 if (--ring->irq_refcount == 0) {
898 if (IS_G4X(dev))
899 i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
900 else
901 ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
902 }
903 }
904
905 static int
ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t length)906 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, uint32_t offset,
907 uint32_t length)
908 {
909 int ret;
910
911 ret = intel_ring_begin(ring, 2);
912 if (ret)
913 return ret;
914
915 intel_ring_emit(ring,
916 MI_BATCH_BUFFER_START | (2 << 6) |
917 MI_BATCH_NON_SECURE_I965);
918 intel_ring_emit(ring, offset);
919 intel_ring_advance(ring);
920
921 return 0;
922 }
923
924 static int
render_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t len)925 render_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
926 uint32_t offset, uint32_t len)
927 {
928 struct drm_device *dev = ring->dev;
929 int ret;
930
931 if (IS_I830(dev) || IS_845G(dev)) {
932 ret = intel_ring_begin(ring, 4);
933 if (ret)
934 return ret;
935
936 intel_ring_emit(ring, MI_BATCH_BUFFER);
937 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
938 intel_ring_emit(ring, offset + len - 8);
939 intel_ring_emit(ring, 0);
940 } else {
941 ret = intel_ring_begin(ring, 2);
942 if (ret)
943 return ret;
944
945 if (INTEL_INFO(dev)->gen >= 4) {
946 intel_ring_emit(ring,
947 MI_BATCH_BUFFER_START | (2 << 6) |
948 MI_BATCH_NON_SECURE_I965);
949 intel_ring_emit(ring, offset);
950 } else {
951 intel_ring_emit(ring,
952 MI_BATCH_BUFFER_START | (2 << 6));
953 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE);
954 }
955 }
956 intel_ring_advance(ring);
957
958 return 0;
959 }
960
cleanup_status_page(struct intel_ring_buffer * ring)961 static void cleanup_status_page(struct intel_ring_buffer *ring)
962 {
963 drm_i915_private_t *dev_priv = ring->dev->dev_private;
964 struct drm_i915_gem_object *obj;
965
966 obj = ring->status_page.obj;
967 if (obj == NULL)
968 return;
969
970 pmap_qremove((vm_offset_t)ring->status_page.page_addr, 1);
971 kmem_free(kernel_map, (vm_offset_t)ring->status_page.page_addr,
972 PAGE_SIZE);
973 i915_gem_object_unpin(obj);
974 drm_gem_object_unreference(&obj->base);
975 ring->status_page.obj = NULL;
976
977 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
978 }
979
init_status_page(struct intel_ring_buffer * ring)980 static int init_status_page(struct intel_ring_buffer *ring)
981 {
982 struct drm_device *dev = ring->dev;
983 drm_i915_private_t *dev_priv = dev->dev_private;
984 struct drm_i915_gem_object *obj;
985 int ret;
986
987 obj = i915_gem_alloc_object(dev, 4096);
988 if (obj == NULL) {
989 DRM_ERROR("Failed to allocate status page\n");
990 ret = -ENOMEM;
991 goto err;
992 }
993
994 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
995
996 ret = i915_gem_object_pin(obj, 4096, true);
997 if (ret != 0) {
998 goto err_unref;
999 }
1000
1001 ring->status_page.gfx_addr = obj->gtt_offset;
1002 ring->status_page.page_addr = (void *)kmem_alloc_nofault(kernel_map,
1003 PAGE_SIZE);
1004 if (ring->status_page.page_addr == NULL) {
1005 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
1006 goto err_unpin;
1007 }
1008 pmap_qenter((vm_offset_t)ring->status_page.page_addr, &obj->pages[0],
1009 1);
1010 pmap_invalidate_cache_range((vm_offset_t)ring->status_page.page_addr,
1011 (vm_offset_t)ring->status_page.page_addr + PAGE_SIZE);
1012 ring->status_page.obj = obj;
1013 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1014
1015 intel_ring_setup_status_page(ring);
1016 DRM_DEBUG("i915: init_status_page %s hws offset: 0x%08x\n",
1017 ring->name, ring->status_page.gfx_addr);
1018
1019 return 0;
1020
1021 err_unpin:
1022 i915_gem_object_unpin(obj);
1023 err_unref:
1024 drm_gem_object_unreference(&obj->base);
1025 err:
1026 return ret;
1027 }
1028
1029 static
intel_init_ring_buffer(struct drm_device * dev,struct intel_ring_buffer * ring)1030 int intel_init_ring_buffer(struct drm_device *dev,
1031 struct intel_ring_buffer *ring)
1032 {
1033 struct drm_i915_gem_object *obj;
1034 int ret;
1035
1036 ring->dev = dev;
1037 INIT_LIST_HEAD(&ring->active_list);
1038 INIT_LIST_HEAD(&ring->request_list);
1039 INIT_LIST_HEAD(&ring->gpu_write_list);
1040
1041 mtx_init(&ring->irq_lock, "ringb", NULL, MTX_DEF);
1042 ring->irq_mask = ~0;
1043
1044 if (I915_NEED_GFX_HWS(dev)) {
1045 ret = init_status_page(ring);
1046 if (ret)
1047 return ret;
1048 }
1049
1050 obj = i915_gem_alloc_object(dev, ring->size);
1051 if (obj == NULL) {
1052 DRM_ERROR("Failed to allocate ringbuffer\n");
1053 ret = -ENOMEM;
1054 goto err_hws;
1055 }
1056
1057 ring->obj = obj;
1058
1059 ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
1060 if (ret)
1061 goto err_unref;
1062
1063 ring->map.size = ring->size;
1064 ring->map.offset = dev->agp->base + obj->gtt_offset;
1065 ring->map.type = 0;
1066 ring->map.flags = 0;
1067 ring->map.mtrr = 0;
1068
1069 drm_core_ioremap_wc(&ring->map, dev);
1070 if (ring->map.virtual == NULL) {
1071 DRM_ERROR("Failed to map ringbuffer.\n");
1072 ret = -EINVAL;
1073 goto err_unpin;
1074 }
1075
1076 ring->virtual_start = ring->map.virtual;
1077 ret = ring->init(ring);
1078 if (ret)
1079 goto err_unmap;
1080
1081 /* Workaround an erratum on the i830 which causes a hang if
1082 * the TAIL pointer points to within the last 2 cachelines
1083 * of the buffer.
1084 */
1085 ring->effective_size = ring->size;
1086 if (IS_I830(ring->dev) || IS_845G(ring->dev))
1087 ring->effective_size -= 128;
1088
1089 return 0;
1090
1091 err_unmap:
1092 drm_core_ioremapfree(&ring->map, dev);
1093 err_unpin:
1094 i915_gem_object_unpin(obj);
1095 err_unref:
1096 drm_gem_object_unreference(&obj->base);
1097 ring->obj = NULL;
1098 err_hws:
1099 cleanup_status_page(ring);
1100 return ret;
1101 }
1102
intel_cleanup_ring_buffer(struct intel_ring_buffer * ring)1103 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1104 {
1105 struct drm_i915_private *dev_priv;
1106 int ret;
1107
1108 if (ring->obj == NULL)
1109 return;
1110
1111 /* Disable the ring buffer. The ring must be idle at this point */
1112 dev_priv = ring->dev->dev_private;
1113 ret = intel_wait_ring_idle(ring);
1114 I915_WRITE_CTL(ring, 0);
1115
1116 drm_core_ioremapfree(&ring->map, ring->dev);
1117
1118 i915_gem_object_unpin(ring->obj);
1119 drm_gem_object_unreference(&ring->obj->base);
1120 ring->obj = NULL;
1121
1122 if (ring->cleanup)
1123 ring->cleanup(ring);
1124
1125 cleanup_status_page(ring);
1126 }
1127
intel_wrap_ring_buffer(struct intel_ring_buffer * ring)1128 static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1129 {
1130 unsigned int *virt;
1131 int rem = ring->size - ring->tail;
1132
1133 if (ring->space < rem) {
1134 int ret = intel_wait_ring_buffer(ring, rem);
1135 if (ret)
1136 return ret;
1137 }
1138
1139 virt = (unsigned int *)((char *)ring->virtual_start + ring->tail);
1140 rem /= 8;
1141 while (rem--) {
1142 *virt++ = MI_NOOP;
1143 *virt++ = MI_NOOP;
1144 }
1145
1146 ring->tail = 0;
1147 ring->space = ring_space(ring);
1148
1149 return 0;
1150 }
1151
intel_ring_wait_seqno(struct intel_ring_buffer * ring,u32 seqno)1152 static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1153 {
1154 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1155 bool was_interruptible;
1156 int ret;
1157
1158 /* XXX As we have not yet audited all the paths to check that
1159 * they are ready for ERESTARTSYS from intel_ring_begin, do not
1160 * allow us to be interruptible by a signal.
1161 */
1162 was_interruptible = dev_priv->mm.interruptible;
1163 dev_priv->mm.interruptible = false;
1164
1165 ret = i915_wait_request(ring, seqno, true);
1166
1167 dev_priv->mm.interruptible = was_interruptible;
1168
1169 return ret;
1170 }
1171
intel_ring_wait_request(struct intel_ring_buffer * ring,int n)1172 static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1173 {
1174 struct drm_i915_gem_request *request;
1175 u32 seqno = 0;
1176 int ret;
1177
1178 i915_gem_retire_requests_ring(ring);
1179
1180 if (ring->last_retired_head != -1) {
1181 ring->head = ring->last_retired_head;
1182 ring->last_retired_head = -1;
1183 ring->space = ring_space(ring);
1184 if (ring->space >= n)
1185 return 0;
1186 }
1187
1188 list_for_each_entry(request, &ring->request_list, list) {
1189 int space;
1190
1191 if (request->tail == -1)
1192 continue;
1193
1194 space = request->tail - (ring->tail + 8);
1195 if (space < 0)
1196 space += ring->size;
1197 if (space >= n) {
1198 seqno = request->seqno;
1199 break;
1200 }
1201
1202 /* Consume this request in case we need more space than
1203 * is available and so need to prevent a race between
1204 * updating last_retired_head and direct reads of
1205 * I915_RING_HEAD. It also provides a nice sanity check.
1206 */
1207 request->tail = -1;
1208 }
1209
1210 if (seqno == 0)
1211 return -ENOSPC;
1212
1213 ret = intel_ring_wait_seqno(ring, seqno);
1214 if (ret)
1215 return ret;
1216
1217 if (ring->last_retired_head == -1)
1218 return -ENOSPC;
1219
1220 ring->head = ring->last_retired_head;
1221 ring->last_retired_head = -1;
1222 ring->space = ring_space(ring);
1223 if (ring->space < n)
1224 return -ENOSPC;
1225
1226 return 0;
1227 }
1228
intel_wait_ring_buffer(struct intel_ring_buffer * ring,int n)1229 int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1230 {
1231 struct drm_device *dev = ring->dev;
1232 struct drm_i915_private *dev_priv = dev->dev_private;
1233 int end;
1234 int ret;
1235
1236 ret = intel_ring_wait_request(ring, n);
1237 if (ret != -ENOSPC)
1238 return ret;
1239
1240 CTR1(KTR_DRM, "ring_wait_begin %s", ring->name);
1241 if (drm_core_check_feature(dev, DRIVER_GEM))
1242 /* With GEM the hangcheck timer should kick us out of the loop,
1243 * leaving it early runs the risk of corrupting GEM state (due
1244 * to running on almost untested codepaths). But on resume
1245 * timers don't work yet, so prevent a complete hang in that
1246 * case by choosing an insanely large timeout. */
1247 end = ticks + hz * 60;
1248 else
1249 end = ticks + hz * 3;
1250 do {
1251 ring->head = I915_READ_HEAD(ring);
1252 ring->space = ring_space(ring);
1253 if (ring->space >= n) {
1254 CTR1(KTR_DRM, "ring_wait_end %s", ring->name);
1255 return 0;
1256 }
1257
1258 #if 0
1259 if (dev->primary->master) {
1260 struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
1261 if (master_priv->sarea_priv)
1262 master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1263 }
1264 #else
1265 if (dev_priv->sarea_priv)
1266 dev_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
1267 #endif
1268
1269 pause("915rng", 1);
1270 if (atomic_load_acq_32(&dev_priv->mm.wedged) != 0) {
1271 CTR1(KTR_DRM, "ring_wait_end %s wedged", ring->name);
1272 return -EAGAIN;
1273 }
1274 } while (!time_after(ticks, end));
1275 CTR1(KTR_DRM, "ring_wait_end %s busy", ring->name);
1276 return -EBUSY;
1277 }
1278
intel_ring_begin(struct intel_ring_buffer * ring,int num_dwords)1279 int intel_ring_begin(struct intel_ring_buffer *ring,
1280 int num_dwords)
1281 {
1282 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1283 int n = 4*num_dwords;
1284 int ret;
1285
1286 if (atomic_load_acq_int(&dev_priv->mm.wedged))
1287 return -EIO;
1288
1289 if (ring->tail + n > ring->effective_size) {
1290 ret = intel_wrap_ring_buffer(ring);
1291 if (ret != 0)
1292 return ret;
1293 }
1294
1295 if (ring->space < n) {
1296 ret = intel_wait_ring_buffer(ring, n);
1297 if (ret != 0)
1298 return ret;
1299 }
1300
1301 ring->space -= n;
1302 return 0;
1303 }
1304
intel_ring_advance(struct intel_ring_buffer * ring)1305 void intel_ring_advance(struct intel_ring_buffer *ring)
1306 {
1307 ring->tail &= ring->size - 1;
1308 ring->write_tail(ring, ring->tail);
1309 }
1310
1311 static const struct intel_ring_buffer render_ring = {
1312 .name = "render ring",
1313 .id = RCS,
1314 .mmio_base = RENDER_RING_BASE,
1315 .size = 32 * PAGE_SIZE,
1316 .init = init_render_ring,
1317 .write_tail = ring_write_tail,
1318 .flush = render_ring_flush,
1319 .add_request = render_ring_add_request,
1320 .get_seqno = ring_get_seqno,
1321 .irq_get = render_ring_get_irq,
1322 .irq_put = render_ring_put_irq,
1323 .dispatch_execbuffer = render_ring_dispatch_execbuffer,
1324 .cleanup = render_ring_cleanup,
1325 .sync_to = render_ring_sync_to,
1326 .semaphore_register = {MI_SEMAPHORE_SYNC_INVALID,
1327 MI_SEMAPHORE_SYNC_RV,
1328 MI_SEMAPHORE_SYNC_RB},
1329 .signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC},
1330 };
1331
1332 /* ring buffer for bit-stream decoder */
1333
1334 static const struct intel_ring_buffer bsd_ring = {
1335 .name = "bsd ring",
1336 .id = VCS,
1337 .mmio_base = BSD_RING_BASE,
1338 .size = 32 * PAGE_SIZE,
1339 .init = init_ring_common,
1340 .write_tail = ring_write_tail,
1341 .flush = bsd_ring_flush,
1342 .add_request = ring_add_request,
1343 .get_seqno = ring_get_seqno,
1344 .irq_get = bsd_ring_get_irq,
1345 .irq_put = bsd_ring_put_irq,
1346 .dispatch_execbuffer = ring_dispatch_execbuffer,
1347 };
1348
1349
gen6_bsd_ring_write_tail(struct intel_ring_buffer * ring,uint32_t value)1350 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
1351 uint32_t value)
1352 {
1353 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1354
1355 /* Every tail move must follow the sequence below */
1356 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1357 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1358 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
1359 I915_WRITE(GEN6_BSD_RNCID, 0x0);
1360
1361 if (_intel_wait_for(ring->dev,
1362 (I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
1363 GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, 50,
1364 true, "915g6i") != 0)
1365 DRM_ERROR("timed out waiting for IDLE Indicator\n");
1366
1367 I915_WRITE_TAIL(ring, value);
1368 I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
1369 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
1370 GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
1371 }
1372
gen6_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate,uint32_t flush)1373 static int gen6_ring_flush(struct intel_ring_buffer *ring,
1374 uint32_t invalidate, uint32_t flush)
1375 {
1376 uint32_t cmd;
1377 int ret;
1378
1379 ret = intel_ring_begin(ring, 4);
1380 if (ret)
1381 return ret;
1382
1383 cmd = MI_FLUSH_DW;
1384 if (invalidate & I915_GEM_GPU_DOMAINS)
1385 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
1386 intel_ring_emit(ring, cmd);
1387 intel_ring_emit(ring, 0);
1388 intel_ring_emit(ring, 0);
1389 intel_ring_emit(ring, MI_NOOP);
1390 intel_ring_advance(ring);
1391 return 0;
1392 }
1393
1394 static int
gen6_ring_dispatch_execbuffer(struct intel_ring_buffer * ring,uint32_t offset,uint32_t len)1395 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1396 uint32_t offset, uint32_t len)
1397 {
1398 int ret;
1399
1400 ret = intel_ring_begin(ring, 2);
1401 if (ret)
1402 return ret;
1403
1404 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
1405 /* bit0-7 is the length on GEN6+ */
1406 intel_ring_emit(ring, offset);
1407 intel_ring_advance(ring);
1408
1409 return 0;
1410 }
1411
1412 static bool
gen6_render_ring_get_irq(struct intel_ring_buffer * ring)1413 gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
1414 {
1415 return gen6_ring_get_irq(ring,
1416 GT_USER_INTERRUPT,
1417 GEN6_RENDER_USER_INTERRUPT);
1418 }
1419
1420 static void
gen6_render_ring_put_irq(struct intel_ring_buffer * ring)1421 gen6_render_ring_put_irq(struct intel_ring_buffer *ring)
1422 {
1423 return gen6_ring_put_irq(ring,
1424 GT_USER_INTERRUPT,
1425 GEN6_RENDER_USER_INTERRUPT);
1426 }
1427
1428 static bool
gen6_bsd_ring_get_irq(struct intel_ring_buffer * ring)1429 gen6_bsd_ring_get_irq(struct intel_ring_buffer *ring)
1430 {
1431 return gen6_ring_get_irq(ring,
1432 GT_GEN6_BSD_USER_INTERRUPT,
1433 GEN6_BSD_USER_INTERRUPT);
1434 }
1435
1436 static void
gen6_bsd_ring_put_irq(struct intel_ring_buffer * ring)1437 gen6_bsd_ring_put_irq(struct intel_ring_buffer *ring)
1438 {
1439 return gen6_ring_put_irq(ring,
1440 GT_GEN6_BSD_USER_INTERRUPT,
1441 GEN6_BSD_USER_INTERRUPT);
1442 }
1443
1444 /* ring buffer for Video Codec for Gen6+ */
1445 static const struct intel_ring_buffer gen6_bsd_ring = {
1446 .name = "gen6 bsd ring",
1447 .id = VCS,
1448 .mmio_base = GEN6_BSD_RING_BASE,
1449 .size = 32 * PAGE_SIZE,
1450 .init = init_ring_common,
1451 .write_tail = gen6_bsd_ring_write_tail,
1452 .flush = gen6_ring_flush,
1453 .add_request = gen6_add_request,
1454 .get_seqno = gen6_ring_get_seqno,
1455 .irq_get = gen6_bsd_ring_get_irq,
1456 .irq_put = gen6_bsd_ring_put_irq,
1457 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
1458 .sync_to = gen6_bsd_ring_sync_to,
1459 .semaphore_register = {MI_SEMAPHORE_SYNC_VR,
1460 MI_SEMAPHORE_SYNC_INVALID,
1461 MI_SEMAPHORE_SYNC_VB},
1462 .signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC},
1463 };
1464
1465 /* Blitter support (SandyBridge+) */
1466
1467 static bool
blt_ring_get_irq(struct intel_ring_buffer * ring)1468 blt_ring_get_irq(struct intel_ring_buffer *ring)
1469 {
1470 return gen6_ring_get_irq(ring,
1471 GT_BLT_USER_INTERRUPT,
1472 GEN6_BLITTER_USER_INTERRUPT);
1473 }
1474
1475 static void
blt_ring_put_irq(struct intel_ring_buffer * ring)1476 blt_ring_put_irq(struct intel_ring_buffer *ring)
1477 {
1478 gen6_ring_put_irq(ring,
1479 GT_BLT_USER_INTERRUPT,
1480 GEN6_BLITTER_USER_INTERRUPT);
1481 }
1482
blt_ring_flush(struct intel_ring_buffer * ring,uint32_t invalidate,uint32_t flush)1483 static int blt_ring_flush(struct intel_ring_buffer *ring,
1484 uint32_t invalidate, uint32_t flush)
1485 {
1486 uint32_t cmd;
1487 int ret;
1488
1489 ret = intel_ring_begin(ring, 4);
1490 if (ret)
1491 return ret;
1492
1493 cmd = MI_FLUSH_DW;
1494 if (invalidate & I915_GEM_DOMAIN_RENDER)
1495 cmd |= MI_INVALIDATE_TLB;
1496 intel_ring_emit(ring, cmd);
1497 intel_ring_emit(ring, 0);
1498 intel_ring_emit(ring, 0);
1499 intel_ring_emit(ring, MI_NOOP);
1500 intel_ring_advance(ring);
1501 return 0;
1502 }
1503
1504 static const struct intel_ring_buffer gen6_blt_ring = {
1505 .name = "blt ring",
1506 .id = BCS,
1507 .mmio_base = BLT_RING_BASE,
1508 .size = 32 * PAGE_SIZE,
1509 .init = init_ring_common,
1510 .write_tail = ring_write_tail,
1511 .flush = blt_ring_flush,
1512 .add_request = gen6_add_request,
1513 .get_seqno = gen6_ring_get_seqno,
1514 .irq_get = blt_ring_get_irq,
1515 .irq_put = blt_ring_put_irq,
1516 .dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
1517 .sync_to = gen6_blt_ring_sync_to,
1518 .semaphore_register = {MI_SEMAPHORE_SYNC_BR,
1519 MI_SEMAPHORE_SYNC_BV,
1520 MI_SEMAPHORE_SYNC_INVALID},
1521 .signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC},
1522 };
1523
intel_init_render_ring_buffer(struct drm_device * dev)1524 int intel_init_render_ring_buffer(struct drm_device *dev)
1525 {
1526 drm_i915_private_t *dev_priv = dev->dev_private;
1527 struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1528
1529 *ring = render_ring;
1530 if (INTEL_INFO(dev)->gen >= 6) {
1531 ring->add_request = gen6_add_request;
1532 ring->flush = gen6_render_ring_flush;
1533 ring->irq_get = gen6_render_ring_get_irq;
1534 ring->irq_put = gen6_render_ring_put_irq;
1535 ring->get_seqno = gen6_ring_get_seqno;
1536 } else if (IS_GEN5(dev)) {
1537 ring->add_request = pc_render_add_request;
1538 ring->get_seqno = pc_render_get_seqno;
1539 }
1540
1541 if (!I915_NEED_GFX_HWS(dev)) {
1542 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1543 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1544 }
1545
1546 return intel_init_ring_buffer(dev, ring);
1547 }
1548
intel_render_ring_init_dri(struct drm_device * dev,uint64_t start,uint32_t size)1549 int intel_render_ring_init_dri(struct drm_device *dev, uint64_t start,
1550 uint32_t size)
1551 {
1552 drm_i915_private_t *dev_priv = dev->dev_private;
1553 struct intel_ring_buffer *ring = &dev_priv->rings[RCS];
1554
1555 *ring = render_ring;
1556 if (INTEL_INFO(dev)->gen >= 6) {
1557 ring->add_request = gen6_add_request;
1558 ring->irq_get = gen6_render_ring_get_irq;
1559 ring->irq_put = gen6_render_ring_put_irq;
1560 } else if (IS_GEN5(dev)) {
1561 ring->add_request = pc_render_add_request;
1562 ring->get_seqno = pc_render_get_seqno;
1563 }
1564
1565 ring->dev = dev;
1566 INIT_LIST_HEAD(&ring->active_list);
1567 INIT_LIST_HEAD(&ring->request_list);
1568 INIT_LIST_HEAD(&ring->gpu_write_list);
1569
1570 ring->size = size;
1571 ring->effective_size = ring->size;
1572 if (IS_I830(ring->dev))
1573 ring->effective_size -= 128;
1574
1575 ring->map.offset = start;
1576 ring->map.size = size;
1577 ring->map.type = 0;
1578 ring->map.flags = 0;
1579 ring->map.mtrr = 0;
1580
1581 drm_core_ioremap_wc(&ring->map, dev);
1582 if (ring->map.virtual == NULL) {
1583 DRM_ERROR("can not ioremap virtual address for"
1584 " ring buffer\n");
1585 return -ENOMEM;
1586 }
1587
1588 ring->virtual_start = (void *)ring->map.virtual;
1589 return 0;
1590 }
1591
intel_init_bsd_ring_buffer(struct drm_device * dev)1592 int intel_init_bsd_ring_buffer(struct drm_device *dev)
1593 {
1594 drm_i915_private_t *dev_priv = dev->dev_private;
1595 struct intel_ring_buffer *ring = &dev_priv->rings[VCS];
1596
1597 if (IS_GEN6(dev) || IS_GEN7(dev))
1598 *ring = gen6_bsd_ring;
1599 else
1600 *ring = bsd_ring;
1601
1602 return intel_init_ring_buffer(dev, ring);
1603 }
1604
intel_init_blt_ring_buffer(struct drm_device * dev)1605 int intel_init_blt_ring_buffer(struct drm_device *dev)
1606 {
1607 drm_i915_private_t *dev_priv = dev->dev_private;
1608 struct intel_ring_buffer *ring = &dev_priv->rings[BCS];
1609
1610 *ring = gen6_blt_ring;
1611
1612 return intel_init_ring_buffer(dev, ring);
1613 }
1614