1 /*-
2  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Gareth Hughes <gareth@valinux.com>
26  *    Kevin E. Martin <martin@valinux.com>
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD: stable/9/sys/dev/drm/radeon_state.c 229732 2012-01-06 21:16:51Z dim $");
31 
32 #include "dev/drm/drmP.h"
33 #include "dev/drm/drm.h"
34 #include "dev/drm/drm_sarea.h"
35 #include "dev/drm/radeon_drm.h"
36 #include "dev/drm/radeon_drv.h"
37 
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41 
radeon_check_and_fixup_offset(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,u32 * offset)42 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
43 						    dev_priv,
44 						    struct drm_file *file_priv,
45 						    u32 *offset)
46 {
47 	u64 off = *offset;
48 	u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
49 	struct drm_radeon_driver_file_fields *radeon_priv;
50 
51 	/* Hrm ... the story of the offset ... So this function converts
52 	 * the various ideas of what userland clients might have for an
53 	 * offset in the card address space into an offset into the card
54 	 * address space :) So with a sane client, it should just keep
55 	 * the value intact and just do some boundary checking. However,
56 	 * not all clients are sane. Some older clients pass us 0 based
57 	 * offsets relative to the start of the framebuffer and some may
58 	 * assume the AGP aperture it appended to the framebuffer, so we
59 	 * try to detect those cases and fix them up.
60 	 *
61 	 * Note: It might be a good idea here to make sure the offset lands
62 	 * in some "allowed" area to protect things like the PCIE GART...
63 	 */
64 
65 	/* First, the best case, the offset already lands in either the
66 	 * framebuffer or the GART mapped space
67 	 */
68 	if (radeon_check_offset(dev_priv, off))
69 		return 0;
70 
71 	/* Ok, that didn't happen... now check if we have a zero based
72 	 * offset that fits in the framebuffer + gart space, apply the
73 	 * magic offset we get from SETPARAM or calculated from fb_location
74 	 */
75 	if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76 		radeon_priv = file_priv->driver_priv;
77 		off += radeon_priv->radeon_fb_delta;
78 	}
79 
80 	/* Finally, assume we aimed at a GART offset if beyond the fb */
81 	if (off > fb_end)
82 		off = off - fb_end - 1 + dev_priv->gart_vm_start;
83 
84 	/* Now recheck and fail if out of bounds */
85 	if (radeon_check_offset(dev_priv, off)) {
86 		DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
87 		*offset = off;
88 		return 0;
89 	}
90 	return -EINVAL;
91 }
92 
radeon_check_and_fixup_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,int id,u32 * data)93 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
94 						     dev_priv,
95 						     struct drm_file *file_priv,
96 						     int id, u32 *data)
97 {
98 	switch (id) {
99 
100 	case RADEON_EMIT_PP_MISC:
101 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
102 		    &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
103 			DRM_ERROR("Invalid depth buffer offset\n");
104 			return -EINVAL;
105 		}
106 		break;
107 
108 	case RADEON_EMIT_PP_CNTL:
109 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
110 		    &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
111 			DRM_ERROR("Invalid colour buffer offset\n");
112 			return -EINVAL;
113 		}
114 		break;
115 
116 	case R200_EMIT_PP_TXOFFSET_0:
117 	case R200_EMIT_PP_TXOFFSET_1:
118 	case R200_EMIT_PP_TXOFFSET_2:
119 	case R200_EMIT_PP_TXOFFSET_3:
120 	case R200_EMIT_PP_TXOFFSET_4:
121 	case R200_EMIT_PP_TXOFFSET_5:
122 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
123 						  &data[0])) {
124 			DRM_ERROR("Invalid R200 texture offset\n");
125 			return -EINVAL;
126 		}
127 		break;
128 
129 	case RADEON_EMIT_PP_TXFILTER_0:
130 	case RADEON_EMIT_PP_TXFILTER_1:
131 	case RADEON_EMIT_PP_TXFILTER_2:
132 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
133 		    &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
134 			DRM_ERROR("Invalid R100 texture offset\n");
135 			return -EINVAL;
136 		}
137 		break;
138 
139 	case R200_EMIT_PP_CUBIC_OFFSETS_0:
140 	case R200_EMIT_PP_CUBIC_OFFSETS_1:
141 	case R200_EMIT_PP_CUBIC_OFFSETS_2:
142 	case R200_EMIT_PP_CUBIC_OFFSETS_3:
143 	case R200_EMIT_PP_CUBIC_OFFSETS_4:
144 	case R200_EMIT_PP_CUBIC_OFFSETS_5:{
145 			int i;
146 			for (i = 0; i < 5; i++) {
147 				if (radeon_check_and_fixup_offset(dev_priv,
148 								  file_priv,
149 								  &data[i])) {
150 					DRM_ERROR
151 					    ("Invalid R200 cubic texture offset\n");
152 					return -EINVAL;
153 				}
154 			}
155 			break;
156 		}
157 
158 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
159 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
160 	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
161 			int i;
162 			for (i = 0; i < 5; i++) {
163 				if (radeon_check_and_fixup_offset(dev_priv,
164 								  file_priv,
165 								  &data[i])) {
166 					DRM_ERROR
167 					    ("Invalid R100 cubic texture offset\n");
168 					return -EINVAL;
169 				}
170 			}
171 		}
172 		break;
173 
174 	case R200_EMIT_VAP_CTL: {
175 			RING_LOCALS;
176 			BEGIN_RING(2);
177 			OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
178 			ADVANCE_RING();
179 		}
180 		break;
181 
182 	case RADEON_EMIT_RB3D_COLORPITCH:
183 	case RADEON_EMIT_RE_LINE_PATTERN:
184 	case RADEON_EMIT_SE_LINE_WIDTH:
185 	case RADEON_EMIT_PP_LUM_MATRIX:
186 	case RADEON_EMIT_PP_ROT_MATRIX_0:
187 	case RADEON_EMIT_RB3D_STENCILREFMASK:
188 	case RADEON_EMIT_SE_VPORT_XSCALE:
189 	case RADEON_EMIT_SE_CNTL:
190 	case RADEON_EMIT_SE_CNTL_STATUS:
191 	case RADEON_EMIT_RE_MISC:
192 	case RADEON_EMIT_PP_BORDER_COLOR_0:
193 	case RADEON_EMIT_PP_BORDER_COLOR_1:
194 	case RADEON_EMIT_PP_BORDER_COLOR_2:
195 	case RADEON_EMIT_SE_ZBIAS_FACTOR:
196 	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
197 	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
198 	case R200_EMIT_PP_TXCBLEND_0:
199 	case R200_EMIT_PP_TXCBLEND_1:
200 	case R200_EMIT_PP_TXCBLEND_2:
201 	case R200_EMIT_PP_TXCBLEND_3:
202 	case R200_EMIT_PP_TXCBLEND_4:
203 	case R200_EMIT_PP_TXCBLEND_5:
204 	case R200_EMIT_PP_TXCBLEND_6:
205 	case R200_EMIT_PP_TXCBLEND_7:
206 	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
207 	case R200_EMIT_TFACTOR_0:
208 	case R200_EMIT_VTX_FMT_0:
209 	case R200_EMIT_MATRIX_SELECT_0:
210 	case R200_EMIT_TEX_PROC_CTL_2:
211 	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
212 	case R200_EMIT_PP_TXFILTER_0:
213 	case R200_EMIT_PP_TXFILTER_1:
214 	case R200_EMIT_PP_TXFILTER_2:
215 	case R200_EMIT_PP_TXFILTER_3:
216 	case R200_EMIT_PP_TXFILTER_4:
217 	case R200_EMIT_PP_TXFILTER_5:
218 	case R200_EMIT_VTE_CNTL:
219 	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
220 	case R200_EMIT_PP_TAM_DEBUG3:
221 	case R200_EMIT_PP_CNTL_X:
222 	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
223 	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
224 	case R200_EMIT_RE_SCISSOR_TL_0:
225 	case R200_EMIT_RE_SCISSOR_TL_1:
226 	case R200_EMIT_RE_SCISSOR_TL_2:
227 	case R200_EMIT_SE_VAP_CNTL_STATUS:
228 	case R200_EMIT_SE_VTX_STATE_CNTL:
229 	case R200_EMIT_RE_POINTSIZE:
230 	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
231 	case R200_EMIT_PP_CUBIC_FACES_0:
232 	case R200_EMIT_PP_CUBIC_FACES_1:
233 	case R200_EMIT_PP_CUBIC_FACES_2:
234 	case R200_EMIT_PP_CUBIC_FACES_3:
235 	case R200_EMIT_PP_CUBIC_FACES_4:
236 	case R200_EMIT_PP_CUBIC_FACES_5:
237 	case RADEON_EMIT_PP_TEX_SIZE_0:
238 	case RADEON_EMIT_PP_TEX_SIZE_1:
239 	case RADEON_EMIT_PP_TEX_SIZE_2:
240 	case R200_EMIT_RB3D_BLENDCOLOR:
241 	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
242 	case RADEON_EMIT_PP_CUBIC_FACES_0:
243 	case RADEON_EMIT_PP_CUBIC_FACES_1:
244 	case RADEON_EMIT_PP_CUBIC_FACES_2:
245 	case R200_EMIT_PP_TRI_PERF_CNTL:
246 	case R200_EMIT_PP_AFS_0:
247 	case R200_EMIT_PP_AFS_1:
248 	case R200_EMIT_ATF_TFACTOR:
249 	case R200_EMIT_PP_TXCTLALL_0:
250 	case R200_EMIT_PP_TXCTLALL_1:
251 	case R200_EMIT_PP_TXCTLALL_2:
252 	case R200_EMIT_PP_TXCTLALL_3:
253 	case R200_EMIT_PP_TXCTLALL_4:
254 	case R200_EMIT_PP_TXCTLALL_5:
255 	case R200_EMIT_VAP_PVS_CNTL:
256 		/* These packets don't contain memory offsets */
257 		break;
258 
259 	default:
260 		DRM_ERROR("Unknown state packet ID %d\n", id);
261 		return -EINVAL;
262 	}
263 
264 	return 0;
265 }
266 
radeon_check_and_fixup_packet3(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,unsigned int * cmdsz)267 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
268 						     dev_priv,
269 						     struct drm_file *file_priv,
270 						     drm_radeon_kcmd_buffer_t *
271 						     cmdbuf,
272 						     unsigned int *cmdsz)
273 {
274 	u32 *cmd = (u32 *) cmdbuf->buf;
275 	u32 offset, narrays;
276 	int count, i, k;
277 
278 	*cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
279 
280 	if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
281 		DRM_ERROR("Not a type 3 packet\n");
282 		return -EINVAL;
283 	}
284 
285 	if (4 * *cmdsz > cmdbuf->bufsz) {
286 		DRM_ERROR("Packet size larger than size of data provided\n");
287 		return -EINVAL;
288 	}
289 
290 	switch(cmd[0] & 0xff00) {
291 	/* XXX Are there old drivers needing other packets? */
292 
293 	case RADEON_3D_DRAW_IMMD:
294 	case RADEON_3D_DRAW_VBUF:
295 	case RADEON_3D_DRAW_INDX:
296 	case RADEON_WAIT_FOR_IDLE:
297 	case RADEON_CP_NOP:
298 	case RADEON_3D_CLEAR_ZMASK:
299 /*	case RADEON_CP_NEXT_CHAR:
300 	case RADEON_CP_PLY_NEXTSCAN:
301 	case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
302 		/* these packets are safe */
303 		break;
304 
305 	case RADEON_CP_3D_DRAW_IMMD_2:
306 	case RADEON_CP_3D_DRAW_VBUF_2:
307 	case RADEON_CP_3D_DRAW_INDX_2:
308 	case RADEON_3D_CLEAR_HIZ:
309 		/* safe but r200 only */
310 		if (dev_priv->microcode_version != UCODE_R200) {
311 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
312 			return -EINVAL;
313 		}
314 		break;
315 
316 	case RADEON_3D_LOAD_VBPNTR:
317 		count = (cmd[0] >> 16) & 0x3fff;
318 
319 		if (count > 18) { /* 12 arrays max */
320 			DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
321 				  count);
322 			return -EINVAL;
323 		}
324 
325 		/* carefully check packet contents */
326 		narrays = cmd[1] & ~0xc000;
327 		k = 0;
328 		i = 2;
329 		while ((k < narrays) && (i < (count + 2))) {
330 			i++;		/* skip attribute field */
331 			if (radeon_check_and_fixup_offset(dev_priv, file_priv,
332 							  &cmd[i])) {
333 				DRM_ERROR
334 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
335 				     k, i);
336 				return -EINVAL;
337 			}
338 			k++;
339 			i++;
340 			if (k == narrays)
341 				break;
342 			/* have one more to process, they come in pairs */
343 			if (radeon_check_and_fixup_offset(dev_priv,
344 							  file_priv, &cmd[i]))
345 			{
346 				DRM_ERROR
347 				    ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
348 				     k, i);
349 				return -EINVAL;
350 			}
351 			k++;
352 			i++;
353 		}
354 		/* do the counts match what we expect ? */
355 		if ((k != narrays) || (i != (count + 2))) {
356 			DRM_ERROR
357 			    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
358 			      k, i, narrays, count + 1);
359 			return -EINVAL;
360 		}
361 		break;
362 
363 	case RADEON_3D_RNDR_GEN_INDX_PRIM:
364 		if (dev_priv->microcode_version != UCODE_R100) {
365 			DRM_ERROR("Invalid 3d packet for r200-class chip\n");
366 			return -EINVAL;
367 		}
368 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
369 				DRM_ERROR("Invalid rndr_gen_indx offset\n");
370 				return -EINVAL;
371 		}
372 		break;
373 
374 	case RADEON_CP_INDX_BUFFER:
375 		if (dev_priv->microcode_version != UCODE_R200) {
376 			DRM_ERROR("Invalid 3d packet for r100-class chip\n");
377 			return -EINVAL;
378 		}
379 		if ((cmd[1] & 0x8000ffff) != 0x80000810) {
380 			DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
381 			return -EINVAL;
382 		}
383 		if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
384 			DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
385 			return -EINVAL;
386 		}
387 		break;
388 
389 	case RADEON_CNTL_HOSTDATA_BLT:
390 	case RADEON_CNTL_PAINT_MULTI:
391 	case RADEON_CNTL_BITBLT_MULTI:
392 		/* MSB of opcode: next DWORD GUI_CNTL */
393 		if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
394 			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
395 			offset = cmd[2] << 10;
396 			if (radeon_check_and_fixup_offset
397 			    (dev_priv, file_priv, &offset)) {
398 				DRM_ERROR("Invalid first packet offset\n");
399 				return -EINVAL;
400 			}
401 			cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
402 		}
403 
404 		if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
405 		    (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
406 			offset = cmd[3] << 10;
407 			if (radeon_check_and_fixup_offset
408 			    (dev_priv, file_priv, &offset)) {
409 				DRM_ERROR("Invalid second packet offset\n");
410 				return -EINVAL;
411 			}
412 			cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
413 		}
414 		break;
415 
416 	default:
417 		DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
418 		return -EINVAL;
419 	}
420 
421 	return 0;
422 }
423 
424 /* ================================================================
425  * CP hardware state programming functions
426  */
427 
radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,struct drm_clip_rect * box)428 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
429 					     struct drm_clip_rect * box)
430 {
431 	RING_LOCALS;
432 
433 	DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
434 		  box->x1, box->y1, box->x2, box->y2);
435 
436 	BEGIN_RING(4);
437 	OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
438 	OUT_RING((box->y1 << 16) | box->x1);
439 	OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
440 	OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
441 	ADVANCE_RING();
442 }
443 
444 /* Emit 1.1 state
445  */
radeon_emit_state(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_context_regs_t * ctx,drm_radeon_texture_regs_t * tex,unsigned int dirty)446 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
447 			     struct drm_file *file_priv,
448 			     drm_radeon_context_regs_t * ctx,
449 			     drm_radeon_texture_regs_t * tex,
450 			     unsigned int dirty)
451 {
452 	RING_LOCALS;
453 	DRM_DEBUG("dirty=0x%08x\n", dirty);
454 
455 	if (dirty & RADEON_UPLOAD_CONTEXT) {
456 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
457 						  &ctx->rb3d_depthoffset)) {
458 			DRM_ERROR("Invalid depth buffer offset\n");
459 			return -EINVAL;
460 		}
461 
462 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
463 						  &ctx->rb3d_coloroffset)) {
464 			DRM_ERROR("Invalid depth buffer offset\n");
465 			return -EINVAL;
466 		}
467 
468 		BEGIN_RING(14);
469 		OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
470 		OUT_RING(ctx->pp_misc);
471 		OUT_RING(ctx->pp_fog_color);
472 		OUT_RING(ctx->re_solid_color);
473 		OUT_RING(ctx->rb3d_blendcntl);
474 		OUT_RING(ctx->rb3d_depthoffset);
475 		OUT_RING(ctx->rb3d_depthpitch);
476 		OUT_RING(ctx->rb3d_zstencilcntl);
477 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
478 		OUT_RING(ctx->pp_cntl);
479 		OUT_RING(ctx->rb3d_cntl);
480 		OUT_RING(ctx->rb3d_coloroffset);
481 		OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
482 		OUT_RING(ctx->rb3d_colorpitch);
483 		ADVANCE_RING();
484 	}
485 
486 	if (dirty & RADEON_UPLOAD_VERTFMT) {
487 		BEGIN_RING(2);
488 		OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
489 		OUT_RING(ctx->se_coord_fmt);
490 		ADVANCE_RING();
491 	}
492 
493 	if (dirty & RADEON_UPLOAD_LINE) {
494 		BEGIN_RING(5);
495 		OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
496 		OUT_RING(ctx->re_line_pattern);
497 		OUT_RING(ctx->re_line_state);
498 		OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
499 		OUT_RING(ctx->se_line_width);
500 		ADVANCE_RING();
501 	}
502 
503 	if (dirty & RADEON_UPLOAD_BUMPMAP) {
504 		BEGIN_RING(5);
505 		OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
506 		OUT_RING(ctx->pp_lum_matrix);
507 		OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
508 		OUT_RING(ctx->pp_rot_matrix_0);
509 		OUT_RING(ctx->pp_rot_matrix_1);
510 		ADVANCE_RING();
511 	}
512 
513 	if (dirty & RADEON_UPLOAD_MASKS) {
514 		BEGIN_RING(4);
515 		OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
516 		OUT_RING(ctx->rb3d_stencilrefmask);
517 		OUT_RING(ctx->rb3d_ropcntl);
518 		OUT_RING(ctx->rb3d_planemask);
519 		ADVANCE_RING();
520 	}
521 
522 	if (dirty & RADEON_UPLOAD_VIEWPORT) {
523 		BEGIN_RING(7);
524 		OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
525 		OUT_RING(ctx->se_vport_xscale);
526 		OUT_RING(ctx->se_vport_xoffset);
527 		OUT_RING(ctx->se_vport_yscale);
528 		OUT_RING(ctx->se_vport_yoffset);
529 		OUT_RING(ctx->se_vport_zscale);
530 		OUT_RING(ctx->se_vport_zoffset);
531 		ADVANCE_RING();
532 	}
533 
534 	if (dirty & RADEON_UPLOAD_SETUP) {
535 		BEGIN_RING(4);
536 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
537 		OUT_RING(ctx->se_cntl);
538 		OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
539 		OUT_RING(ctx->se_cntl_status);
540 		ADVANCE_RING();
541 	}
542 
543 	if (dirty & RADEON_UPLOAD_MISC) {
544 		BEGIN_RING(2);
545 		OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
546 		OUT_RING(ctx->re_misc);
547 		ADVANCE_RING();
548 	}
549 
550 	if (dirty & RADEON_UPLOAD_TEX0) {
551 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
552 						  &tex[0].pp_txoffset)) {
553 			DRM_ERROR("Invalid texture offset for unit 0\n");
554 			return -EINVAL;
555 		}
556 
557 		BEGIN_RING(9);
558 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
559 		OUT_RING(tex[0].pp_txfilter);
560 		OUT_RING(tex[0].pp_txformat);
561 		OUT_RING(tex[0].pp_txoffset);
562 		OUT_RING(tex[0].pp_txcblend);
563 		OUT_RING(tex[0].pp_txablend);
564 		OUT_RING(tex[0].pp_tfactor);
565 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
566 		OUT_RING(tex[0].pp_border_color);
567 		ADVANCE_RING();
568 	}
569 
570 	if (dirty & RADEON_UPLOAD_TEX1) {
571 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
572 						  &tex[1].pp_txoffset)) {
573 			DRM_ERROR("Invalid texture offset for unit 1\n");
574 			return -EINVAL;
575 		}
576 
577 		BEGIN_RING(9);
578 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
579 		OUT_RING(tex[1].pp_txfilter);
580 		OUT_RING(tex[1].pp_txformat);
581 		OUT_RING(tex[1].pp_txoffset);
582 		OUT_RING(tex[1].pp_txcblend);
583 		OUT_RING(tex[1].pp_txablend);
584 		OUT_RING(tex[1].pp_tfactor);
585 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
586 		OUT_RING(tex[1].pp_border_color);
587 		ADVANCE_RING();
588 	}
589 
590 	if (dirty & RADEON_UPLOAD_TEX2) {
591 		if (radeon_check_and_fixup_offset(dev_priv, file_priv,
592 						  &tex[2].pp_txoffset)) {
593 			DRM_ERROR("Invalid texture offset for unit 2\n");
594 			return -EINVAL;
595 		}
596 
597 		BEGIN_RING(9);
598 		OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
599 		OUT_RING(tex[2].pp_txfilter);
600 		OUT_RING(tex[2].pp_txformat);
601 		OUT_RING(tex[2].pp_txoffset);
602 		OUT_RING(tex[2].pp_txcblend);
603 		OUT_RING(tex[2].pp_txablend);
604 		OUT_RING(tex[2].pp_tfactor);
605 		OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
606 		OUT_RING(tex[2].pp_border_color);
607 		ADVANCE_RING();
608 	}
609 
610 	return 0;
611 }
612 
613 /* Emit 1.2 state
614  */
radeon_emit_state2(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_state_t * state)615 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
616 			      struct drm_file *file_priv,
617 			      drm_radeon_state_t * state)
618 {
619 	RING_LOCALS;
620 
621 	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
622 		BEGIN_RING(3);
623 		OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
624 		OUT_RING(state->context2.se_zbias_factor);
625 		OUT_RING(state->context2.se_zbias_constant);
626 		ADVANCE_RING();
627 	}
628 
629 	return radeon_emit_state(dev_priv, file_priv, &state->context,
630 				 state->tex, state->dirty);
631 }
632 
633 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
634  * 1.3 cmdbuffers allow all previous state to be updated as well as
635  * the tcl scalar and vector areas.
636  */
637 static struct {
638 	int start;
639 	int len;
640 	const char *name;
641 } packet[RADEON_MAX_STATE_PACKETS] = {
642 	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
643 	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
644 	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
645 	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
646 	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
647 	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
648 	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
649 	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
650 	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
651 	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
652 	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
653 	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
654 	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
655 	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
656 	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
657 	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
658 	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
659 	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
660 	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
661 	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
662 	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
663 		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
664 	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
665 	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
666 	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
667 	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
668 	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
669 	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
670 	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
671 	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
672 	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
673 	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
674 	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
675 	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
676 	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
677 	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
678 	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
679 	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
680 	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
681 	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
682 	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
683 	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
684 	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
685 	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
686 	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
687 	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
688 	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
689 	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
690 	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
691 	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
692 	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
693 	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
694 	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
695 	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
696 	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
697 	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
698 	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
699 	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
700 	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
701 	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
702 	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
703 	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
704 	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
705 		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
706 	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
707 	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
708 	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
709 	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
710 	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
711 	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
712 	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
713 	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
714 	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
715 	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
716 	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
717 	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
718 	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
719 	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
720 	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
721 	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
722 	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
723 	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
724 	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
725 	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
726 	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
727 	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
728 	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
729 	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
730 	{R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
731 	{R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
732 	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
733 	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
734 	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
735 	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
736 	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
737 	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
738 	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
739 	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
740 };
741 
742 /* ================================================================
743  * Performance monitoring functions
744  */
745 
radeon_clear_box(drm_radeon_private_t * dev_priv,int x,int y,int w,int h,int r,int g,int b)746 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
747 			     int x, int y, int w, int h, int r, int g, int b)
748 {
749 	u32 color;
750 	RING_LOCALS;
751 
752 	x += dev_priv->sarea_priv->boxes[0].x1;
753 	y += dev_priv->sarea_priv->boxes[0].y1;
754 
755 	switch (dev_priv->color_fmt) {
756 	case RADEON_COLOR_FORMAT_RGB565:
757 		color = (((r & 0xf8) << 8) |
758 			 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
759 		break;
760 	case RADEON_COLOR_FORMAT_ARGB8888:
761 	default:
762 		color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
763 		break;
764 	}
765 
766 	BEGIN_RING(4);
767 	RADEON_WAIT_UNTIL_3D_IDLE();
768 	OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
769 	OUT_RING(0xffffffff);
770 	ADVANCE_RING();
771 
772 	BEGIN_RING(6);
773 
774 	OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
775 	OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776 		 RADEON_GMC_BRUSH_SOLID_COLOR |
777 		 (dev_priv->color_fmt << 8) |
778 		 RADEON_GMC_SRC_DATATYPE_COLOR |
779 		 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
780 
781 	if (dev_priv->sarea_priv->pfCurrentPage == 1) {
782 		OUT_RING(dev_priv->front_pitch_offset);
783 	} else {
784 		OUT_RING(dev_priv->back_pitch_offset);
785 	}
786 
787 	OUT_RING(color);
788 
789 	OUT_RING((x << 16) | y);
790 	OUT_RING((w << 16) | h);
791 
792 	ADVANCE_RING();
793 }
794 
radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)795 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
796 {
797 	/* Collapse various things into a wait flag -- trying to
798 	 * guess if userspase slept -- better just to have them tell us.
799 	 */
800 	if (dev_priv->stats.last_frame_reads > 1 ||
801 	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
802 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
803 	}
804 
805 	if (dev_priv->stats.freelist_loops) {
806 		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
807 	}
808 
809 	/* Purple box for page flipping
810 	 */
811 	if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
812 		radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
813 
814 	/* Red box if we have to wait for idle at any point
815 	 */
816 	if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
817 		radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
818 
819 	/* Blue box: lost context?
820 	 */
821 
822 	/* Yellow box for texture swaps
823 	 */
824 	if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
825 		radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
826 
827 	/* Green box if hardware never idles (as far as we can tell)
828 	 */
829 	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
830 		radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
831 
832 	/* Draw bars indicating number of buffers allocated
833 	 * (not a great measure, easily confused)
834 	 */
835 	if (dev_priv->stats.requested_bufs) {
836 		if (dev_priv->stats.requested_bufs > 100)
837 			dev_priv->stats.requested_bufs = 100;
838 
839 		radeon_clear_box(dev_priv, 4, 16,
840 				 dev_priv->stats.requested_bufs, 4,
841 				 196, 128, 128);
842 	}
843 
844 	memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
845 
846 }
847 
848 /* ================================================================
849  * CP command dispatch functions
850  */
851 
radeon_cp_dispatch_clear(struct drm_device * dev,drm_radeon_clear_t * clear,drm_radeon_clear_rect_t * depth_boxes)852 static void radeon_cp_dispatch_clear(struct drm_device * dev,
853 				     drm_radeon_clear_t * clear,
854 				     drm_radeon_clear_rect_t * depth_boxes)
855 {
856 	drm_radeon_private_t *dev_priv = dev->dev_private;
857 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
858 	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
859 	int nbox = sarea_priv->nbox;
860 	struct drm_clip_rect *pbox = sarea_priv->boxes;
861 	unsigned int flags = clear->flags;
862 	u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
863 	int i;
864 	RING_LOCALS;
865 	DRM_DEBUG("flags = 0x%x\n", flags);
866 
867 	dev_priv->stats.clears++;
868 
869 	if (sarea_priv->pfCurrentPage == 1) {
870 		unsigned int tmp = flags;
871 
872 		flags &= ~(RADEON_FRONT | RADEON_BACK);
873 		if (tmp & RADEON_FRONT)
874 			flags |= RADEON_BACK;
875 		if (tmp & RADEON_BACK)
876 			flags |= RADEON_FRONT;
877 	}
878 
879 	if (flags & (RADEON_FRONT | RADEON_BACK)) {
880 
881 		BEGIN_RING(4);
882 
883 		/* Ensure the 3D stream is idle before doing a
884 		 * 2D fill to clear the front or back buffer.
885 		 */
886 		RADEON_WAIT_UNTIL_3D_IDLE();
887 
888 		OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
889 		OUT_RING(clear->color_mask);
890 
891 		ADVANCE_RING();
892 
893 		/* Make sure we restore the 3D state next time.
894 		 */
895 		sarea_priv->ctx_owner = 0;
896 
897 		for (i = 0; i < nbox; i++) {
898 			int x = pbox[i].x1;
899 			int y = pbox[i].y1;
900 			int w = pbox[i].x2 - x;
901 			int h = pbox[i].y2 - y;
902 
903 			DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
904 				  x, y, w, h, flags);
905 
906 			if (flags & RADEON_FRONT) {
907 				BEGIN_RING(6);
908 
909 				OUT_RING(CP_PACKET3
910 					 (RADEON_CNTL_PAINT_MULTI, 4));
911 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
912 					 RADEON_GMC_BRUSH_SOLID_COLOR |
913 					 (dev_priv->
914 					  color_fmt << 8) |
915 					 RADEON_GMC_SRC_DATATYPE_COLOR |
916 					 RADEON_ROP3_P |
917 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
918 
919 				OUT_RING(dev_priv->front_pitch_offset);
920 				OUT_RING(clear->clear_color);
921 
922 				OUT_RING((x << 16) | y);
923 				OUT_RING((w << 16) | h);
924 
925 				ADVANCE_RING();
926 			}
927 
928 			if (flags & RADEON_BACK) {
929 				BEGIN_RING(6);
930 
931 				OUT_RING(CP_PACKET3
932 					 (RADEON_CNTL_PAINT_MULTI, 4));
933 				OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
934 					 RADEON_GMC_BRUSH_SOLID_COLOR |
935 					 (dev_priv->
936 					  color_fmt << 8) |
937 					 RADEON_GMC_SRC_DATATYPE_COLOR |
938 					 RADEON_ROP3_P |
939 					 RADEON_GMC_CLR_CMP_CNTL_DIS);
940 
941 				OUT_RING(dev_priv->back_pitch_offset);
942 				OUT_RING(clear->clear_color);
943 
944 				OUT_RING((x << 16) | y);
945 				OUT_RING((w << 16) | h);
946 
947 				ADVANCE_RING();
948 			}
949 		}
950 	}
951 
952 	/* hyper z clear */
953 	/* no docs available, based on reverse engeneering by Stephane Marchesin */
954 	if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
955 	    && (flags & RADEON_CLEAR_FASTZ)) {
956 
957 		int i;
958 		int depthpixperline =
959 		    dev_priv->depth_fmt ==
960 		    RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
961 						       2) : (dev_priv->
962 							     depth_pitch / 4);
963 
964 		u32 clearmask;
965 
966 		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
967 		    ((clear->depth_mask & 0xff) << 24);
968 
969 		/* Make sure we restore the 3D state next time.
970 		 * we haven't touched any "normal" state - still need this?
971 		 */
972 		sarea_priv->ctx_owner = 0;
973 
974 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
975 		    && (flags & RADEON_USE_HIERZ)) {
976 			/* FIXME : reverse engineer that for Rx00 cards */
977 			/* FIXME : the mask supposedly contains low-res z values. So can't set
978 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
979 			   value into account? */
980 			/* pattern seems to work for r100, though get slight
981 			   rendering errors with glxgears. If hierz is not enabled for r100,
982 			   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
983 			   other ones are ignored, and the same clear mask can be used. That's
984 			   very different behaviour than R200 which needs different clear mask
985 			   and different number of tiles to clear if hierz is enabled or not !?!
986 			 */
987 			clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
988 		} else {
989 			/* clear mask : chooses the clearing pattern.
990 			   rv250: could be used to clear only parts of macrotiles
991 			   (but that would get really complicated...)?
992 			   bit 0 and 1 (either or both of them ?!?!) are used to
993 			   not clear tile (or maybe one of the bits indicates if the tile is
994 			   compressed or not), bit 2 and 3 to not clear tile 1,...,.
995 			   Pattern is as follows:
996 			   | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
997 			   bits -------------------------------------------------
998 			   | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
999 			   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1000 			   covers 256 pixels ?!?
1001 			 */
1002 			clearmask = 0x0;
1003 		}
1004 
1005 		BEGIN_RING(8);
1006 		RADEON_WAIT_UNTIL_2D_IDLE();
1007 		OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1008 			     tempRB3D_DEPTHCLEARVALUE);
1009 		/* what offset is this exactly ? */
1010 		OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1011 		/* need ctlstat, otherwise get some strange black flickering */
1012 		OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1013 			     RADEON_RB3D_ZC_FLUSH_ALL);
1014 		ADVANCE_RING();
1015 
1016 		for (i = 0; i < nbox; i++) {
1017 			int tileoffset, nrtilesx, nrtilesy, j;
1018 			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1019 			if ((dev_priv->flags & RADEON_HAS_HIERZ)
1020 			    && !(dev_priv->microcode_version == UCODE_R200)) {
1021 				/* FIXME : figure this out for r200 (when hierz is enabled). Or
1022 				   maybe r200 actually doesn't need to put the low-res z value into
1023 				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
1024 				   Works for R100, both with hierz and without.
1025 				   R100 seems to operate on 2x1 8x8 tiles, but...
1026 				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1027 				   problematic with resolutions which are not 64 pix aligned? */
1028 				tileoffset =
1029 				    ((pbox[i].y1 >> 3) * depthpixperline +
1030 				     pbox[i].x1) >> 6;
1031 				nrtilesx =
1032 				    ((pbox[i].x2 & ~63) -
1033 				     (pbox[i].x1 & ~63)) >> 4;
1034 				nrtilesy =
1035 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1036 				for (j = 0; j <= nrtilesy; j++) {
1037 					BEGIN_RING(4);
1038 					OUT_RING(CP_PACKET3
1039 						 (RADEON_3D_CLEAR_ZMASK, 2));
1040 					/* first tile */
1041 					OUT_RING(tileoffset * 8);
1042 					/* the number of tiles to clear */
1043 					OUT_RING(nrtilesx + 4);
1044 					/* clear mask : chooses the clearing pattern. */
1045 					OUT_RING(clearmask);
1046 					ADVANCE_RING();
1047 					tileoffset += depthpixperline >> 6;
1048 				}
1049 			} else if (dev_priv->microcode_version == UCODE_R200) {
1050 				/* works for rv250. */
1051 				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
1052 				tileoffset =
1053 				    ((pbox[i].y1 >> 3) * depthpixperline +
1054 				     pbox[i].x1) >> 5;
1055 				nrtilesx =
1056 				    (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1057 				nrtilesy =
1058 				    (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1059 				for (j = 0; j <= nrtilesy; j++) {
1060 					BEGIN_RING(4);
1061 					OUT_RING(CP_PACKET3
1062 						 (RADEON_3D_CLEAR_ZMASK, 2));
1063 					/* first tile */
1064 					/* judging by the first tile offset needed, could possibly
1065 					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
1066 					   macro tiles, though would still need clear mask for
1067 					   right/bottom if truely 4x4 granularity is desired ? */
1068 					OUT_RING(tileoffset * 16);
1069 					/* the number of tiles to clear */
1070 					OUT_RING(nrtilesx + 1);
1071 					/* clear mask : chooses the clearing pattern. */
1072 					OUT_RING(clearmask);
1073 					ADVANCE_RING();
1074 					tileoffset += depthpixperline >> 5;
1075 				}
1076 			} else {	/* rv 100 */
1077 				/* rv100 might not need 64 pix alignment, who knows */
1078 				/* offsets are, hmm, weird */
1079 				tileoffset =
1080 				    ((pbox[i].y1 >> 4) * depthpixperline +
1081 				     pbox[i].x1) >> 6;
1082 				nrtilesx =
1083 				    ((pbox[i].x2 & ~63) -
1084 				     (pbox[i].x1 & ~63)) >> 4;
1085 				nrtilesy =
1086 				    (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1087 				for (j = 0; j <= nrtilesy; j++) {
1088 					BEGIN_RING(4);
1089 					OUT_RING(CP_PACKET3
1090 						 (RADEON_3D_CLEAR_ZMASK, 2));
1091 					OUT_RING(tileoffset * 128);
1092 					/* the number of tiles to clear */
1093 					OUT_RING(nrtilesx + 4);
1094 					/* clear mask : chooses the clearing pattern. */
1095 					OUT_RING(clearmask);
1096 					ADVANCE_RING();
1097 					tileoffset += depthpixperline >> 6;
1098 				}
1099 			}
1100 		}
1101 
1102 		/* TODO don't always clear all hi-level z tiles */
1103 		if ((dev_priv->flags & RADEON_HAS_HIERZ)
1104 		    && (dev_priv->microcode_version == UCODE_R200)
1105 		    && (flags & RADEON_USE_HIERZ))
1106 			/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1107 			/* FIXME : the mask supposedly contains low-res z values. So can't set
1108 			   just to the max (0xff? or actually 0x3fff?), need to take z clear
1109 			   value into account? */
1110 		{
1111 			BEGIN_RING(4);
1112 			OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1113 			OUT_RING(0x0);	/* First tile */
1114 			OUT_RING(0x3cc0);
1115 			OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1116 			ADVANCE_RING();
1117 		}
1118 	}
1119 
1120 	/* We have to clear the depth and/or stencil buffers by
1121 	 * rendering a quad into just those buffers.  Thus, we have to
1122 	 * make sure the 3D engine is configured correctly.
1123 	 */
1124 	else if ((dev_priv->microcode_version == UCODE_R200) &&
1125 		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1126 
1127 		int tempPP_CNTL;
1128 		int tempRE_CNTL;
1129 		int tempRB3D_CNTL;
1130 		int tempRB3D_ZSTENCILCNTL;
1131 		int tempRB3D_STENCILREFMASK;
1132 		int tempRB3D_PLANEMASK;
1133 		int tempSE_CNTL;
1134 		int tempSE_VTE_CNTL;
1135 		int tempSE_VTX_FMT_0;
1136 		int tempSE_VTX_FMT_1;
1137 		int tempSE_VAP_CNTL;
1138 		int tempRE_AUX_SCISSOR_CNTL;
1139 
1140 		tempPP_CNTL = 0;
1141 		tempRE_CNTL = 0;
1142 
1143 		tempRB3D_CNTL = depth_clear->rb3d_cntl;
1144 
1145 		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1146 		tempRB3D_STENCILREFMASK = 0x0;
1147 
1148 		tempSE_CNTL = depth_clear->se_cntl;
1149 
1150 		/* Disable TCL */
1151 
1152 		tempSE_VAP_CNTL = (	/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1153 					  (0x9 <<
1154 					   SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1155 
1156 		tempRB3D_PLANEMASK = 0x0;
1157 
1158 		tempRE_AUX_SCISSOR_CNTL = 0x0;
1159 
1160 		tempSE_VTE_CNTL =
1161 		    SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1162 
1163 		/* Vertex format (X, Y, Z, W) */
1164 		tempSE_VTX_FMT_0 =
1165 		    SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1166 		    SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1167 		tempSE_VTX_FMT_1 = 0x0;
1168 
1169 		/*
1170 		 * Depth buffer specific enables
1171 		 */
1172 		if (flags & RADEON_DEPTH) {
1173 			/* Enable depth buffer */
1174 			tempRB3D_CNTL |= RADEON_Z_ENABLE;
1175 		} else {
1176 			/* Disable depth buffer */
1177 			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1178 		}
1179 
1180 		/*
1181 		 * Stencil buffer specific enables
1182 		 */
1183 		if (flags & RADEON_STENCIL) {
1184 			tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1185 			tempRB3D_STENCILREFMASK = clear->depth_mask;
1186 		} else {
1187 			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1188 			tempRB3D_STENCILREFMASK = 0x00000000;
1189 		}
1190 
1191 		if (flags & RADEON_USE_COMP_ZBUF) {
1192 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1193 			    RADEON_Z_DECOMPRESSION_ENABLE;
1194 		}
1195 		if (flags & RADEON_USE_HIERZ) {
1196 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1197 		}
1198 
1199 		BEGIN_RING(26);
1200 		RADEON_WAIT_UNTIL_2D_IDLE();
1201 
1202 		OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1203 		OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1204 		OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1205 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1206 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1207 			     tempRB3D_STENCILREFMASK);
1208 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1209 		OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1210 		OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1211 		OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1212 		OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1213 		OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1214 		OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1215 		ADVANCE_RING();
1216 
1217 		/* Make sure we restore the 3D state next time.
1218 		 */
1219 		sarea_priv->ctx_owner = 0;
1220 
1221 		for (i = 0; i < nbox; i++) {
1222 
1223 			/* Funny that this should be required --
1224 			 *  sets top-left?
1225 			 */
1226 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1227 
1228 			BEGIN_RING(14);
1229 			OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1230 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1231 				  RADEON_PRIM_WALK_RING |
1232 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1233 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1234 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1235 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1236 			OUT_RING(0x3f800000);
1237 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1238 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1239 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1240 			OUT_RING(0x3f800000);
1241 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1242 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1243 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244 			OUT_RING(0x3f800000);
1245 			ADVANCE_RING();
1246 		}
1247 	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1248 
1249 		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1250 
1251 		rb3d_cntl = depth_clear->rb3d_cntl;
1252 
1253 		if (flags & RADEON_DEPTH) {
1254 			rb3d_cntl |= RADEON_Z_ENABLE;
1255 		} else {
1256 			rb3d_cntl &= ~RADEON_Z_ENABLE;
1257 		}
1258 
1259 		if (flags & RADEON_STENCIL) {
1260 			rb3d_cntl |= RADEON_STENCIL_ENABLE;
1261 			rb3d_stencilrefmask = clear->depth_mask;	/* misnamed field */
1262 		} else {
1263 			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1264 			rb3d_stencilrefmask = 0x00000000;
1265 		}
1266 
1267 		if (flags & RADEON_USE_COMP_ZBUF) {
1268 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1269 			    RADEON_Z_DECOMPRESSION_ENABLE;
1270 		}
1271 		if (flags & RADEON_USE_HIERZ) {
1272 			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1273 		}
1274 
1275 		BEGIN_RING(13);
1276 		RADEON_WAIT_UNTIL_2D_IDLE();
1277 
1278 		OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1279 		OUT_RING(0x00000000);
1280 		OUT_RING(rb3d_cntl);
1281 
1282 		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1283 		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1284 		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1285 		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1286 		ADVANCE_RING();
1287 
1288 		/* Make sure we restore the 3D state next time.
1289 		 */
1290 		sarea_priv->ctx_owner = 0;
1291 
1292 		for (i = 0; i < nbox; i++) {
1293 
1294 			/* Funny that this should be required --
1295 			 *  sets top-left?
1296 			 */
1297 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1298 
1299 			BEGIN_RING(15);
1300 
1301 			OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1302 			OUT_RING(RADEON_VTX_Z_PRESENT |
1303 				 RADEON_VTX_PKCOLOR_PRESENT);
1304 			OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1305 				  RADEON_PRIM_WALK_RING |
1306 				  RADEON_MAOS_ENABLE |
1307 				  RADEON_VTX_FMT_RADEON_MODE |
1308 				  (3 << RADEON_NUM_VERTICES_SHIFT)));
1309 
1310 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1311 			OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1312 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1313 			OUT_RING(0x0);
1314 
1315 			OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1316 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1317 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1318 			OUT_RING(0x0);
1319 
1320 			OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1321 			OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1322 			OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1323 			OUT_RING(0x0);
1324 
1325 			ADVANCE_RING();
1326 		}
1327 	}
1328 
1329 	/* Increment the clear counter.  The client-side 3D driver must
1330 	 * wait on this value before performing the clear ioctl.  We
1331 	 * need this because the card's so damned fast...
1332 	 */
1333 	sarea_priv->last_clear++;
1334 
1335 	BEGIN_RING(4);
1336 
1337 	RADEON_CLEAR_AGE(sarea_priv->last_clear);
1338 	RADEON_WAIT_UNTIL_IDLE();
1339 
1340 	ADVANCE_RING();
1341 }
1342 
radeon_cp_dispatch_swap(struct drm_device * dev)1343 static void radeon_cp_dispatch_swap(struct drm_device *dev)
1344 {
1345 	drm_radeon_private_t *dev_priv = dev->dev_private;
1346 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1347 	int nbox = sarea_priv->nbox;
1348 	struct drm_clip_rect *pbox = sarea_priv->boxes;
1349 	int i;
1350 	RING_LOCALS;
1351 	DRM_DEBUG("\n");
1352 
1353 	/* Do some trivial performance monitoring...
1354 	 */
1355 	if (dev_priv->do_boxes)
1356 		radeon_cp_performance_boxes(dev_priv);
1357 
1358 	/* Wait for the 3D stream to idle before dispatching the bitblt.
1359 	 * This will prevent data corruption between the two streams.
1360 	 */
1361 	BEGIN_RING(2);
1362 
1363 	RADEON_WAIT_UNTIL_3D_IDLE();
1364 
1365 	ADVANCE_RING();
1366 
1367 	for (i = 0; i < nbox; i++) {
1368 		int x = pbox[i].x1;
1369 		int y = pbox[i].y1;
1370 		int w = pbox[i].x2 - x;
1371 		int h = pbox[i].y2 - y;
1372 
1373 		DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1374 
1375 		BEGIN_RING(9);
1376 
1377 		OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1378 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1379 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1380 			 RADEON_GMC_BRUSH_NONE |
1381 			 (dev_priv->color_fmt << 8) |
1382 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1383 			 RADEON_ROP3_S |
1384 			 RADEON_DP_SRC_SOURCE_MEMORY |
1385 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1386 
1387 		/* Make this work even if front & back are flipped:
1388 		 */
1389 		OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1390 		if (sarea_priv->pfCurrentPage == 0) {
1391 			OUT_RING(dev_priv->back_pitch_offset);
1392 			OUT_RING(dev_priv->front_pitch_offset);
1393 		} else {
1394 			OUT_RING(dev_priv->front_pitch_offset);
1395 			OUT_RING(dev_priv->back_pitch_offset);
1396 		}
1397 
1398 		OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1399 		OUT_RING((x << 16) | y);
1400 		OUT_RING((x << 16) | y);
1401 		OUT_RING((w << 16) | h);
1402 
1403 		ADVANCE_RING();
1404 	}
1405 
1406 	/* Increment the frame counter.  The client-side 3D driver must
1407 	 * throttle the framerate by waiting for this value before
1408 	 * performing the swapbuffer ioctl.
1409 	 */
1410 	sarea_priv->last_frame++;
1411 
1412 	BEGIN_RING(4);
1413 
1414 	RADEON_FRAME_AGE(sarea_priv->last_frame);
1415 	RADEON_WAIT_UNTIL_2D_IDLE();
1416 
1417 	ADVANCE_RING();
1418 }
1419 
radeon_cp_dispatch_flip(struct drm_device * dev)1420 static void radeon_cp_dispatch_flip(struct drm_device *dev)
1421 {
1422 	drm_radeon_private_t *dev_priv = dev->dev_private;
1423 	struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->virtual;
1424 	int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1425 	    ? dev_priv->front_offset : dev_priv->back_offset;
1426 	RING_LOCALS;
1427 	DRM_DEBUG("pfCurrentPage=%d\n",
1428 		  dev_priv->sarea_priv->pfCurrentPage);
1429 
1430 	/* Do some trivial performance monitoring...
1431 	 */
1432 	if (dev_priv->do_boxes) {
1433 		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1434 		radeon_cp_performance_boxes(dev_priv);
1435 	}
1436 
1437 	/* Update the frame offsets for both CRTCs
1438 	 */
1439 	BEGIN_RING(6);
1440 
1441 	RADEON_WAIT_UNTIL_3D_IDLE();
1442 	OUT_RING_REG(RADEON_CRTC_OFFSET,
1443 		     ((sarea->frame.y * dev_priv->front_pitch +
1444 		       sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1445 		     + offset);
1446 	OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1447 		     + offset);
1448 
1449 	ADVANCE_RING();
1450 
1451 	/* Increment the frame counter.  The client-side 3D driver must
1452 	 * throttle the framerate by waiting for this value before
1453 	 * performing the swapbuffer ioctl.
1454 	 */
1455 	dev_priv->sarea_priv->last_frame++;
1456 	dev_priv->sarea_priv->pfCurrentPage =
1457 		1 - dev_priv->sarea_priv->pfCurrentPage;
1458 
1459 	BEGIN_RING(2);
1460 
1461 	RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1462 
1463 	ADVANCE_RING();
1464 }
1465 
bad_prim_vertex_nr(int primitive,int nr)1466 static int bad_prim_vertex_nr(int primitive, int nr)
1467 {
1468 	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1469 	case RADEON_PRIM_TYPE_NONE:
1470 	case RADEON_PRIM_TYPE_POINT:
1471 		return nr < 1;
1472 	case RADEON_PRIM_TYPE_LINE:
1473 		return (nr & 1) || nr == 0;
1474 	case RADEON_PRIM_TYPE_LINE_STRIP:
1475 		return nr < 2;
1476 	case RADEON_PRIM_TYPE_TRI_LIST:
1477 	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1478 	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1479 	case RADEON_PRIM_TYPE_RECT_LIST:
1480 		return nr % 3 || nr == 0;
1481 	case RADEON_PRIM_TYPE_TRI_FAN:
1482 	case RADEON_PRIM_TYPE_TRI_STRIP:
1483 		return nr < 3;
1484 	default:
1485 		return 1;
1486 	}
1487 }
1488 
1489 typedef struct {
1490 	unsigned int start;
1491 	unsigned int finish;
1492 	unsigned int prim;
1493 	unsigned int numverts;
1494 	unsigned int offset;
1495 	unsigned int vc_format;
1496 } drm_radeon_tcl_prim_t;
1497 
radeon_cp_dispatch_vertex(struct drm_device * dev,struct drm_buf * buf,drm_radeon_tcl_prim_t * prim)1498 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1499 				      struct drm_buf * buf,
1500 				      drm_radeon_tcl_prim_t * prim)
1501 {
1502 	drm_radeon_private_t *dev_priv = dev->dev_private;
1503 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1504 	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1505 	int numverts = (int)prim->numverts;
1506 	int nbox = sarea_priv->nbox;
1507 	int i = 0;
1508 	RING_LOCALS;
1509 
1510 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1511 		  prim->prim,
1512 		  prim->vc_format, prim->start, prim->finish, prim->numverts);
1513 
1514 	if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1515 		DRM_ERROR("bad prim %x numverts %d\n",
1516 			  prim->prim, prim->numverts);
1517 		return;
1518 	}
1519 
1520 	do {
1521 		/* Emit the next cliprect */
1522 		if (i < nbox) {
1523 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1524 		}
1525 
1526 		/* Emit the vertex buffer rendering commands */
1527 		BEGIN_RING(5);
1528 
1529 		OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1530 		OUT_RING(offset);
1531 		OUT_RING(numverts);
1532 		OUT_RING(prim->vc_format);
1533 		OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1534 			 RADEON_COLOR_ORDER_RGBA |
1535 			 RADEON_VTX_FMT_RADEON_MODE |
1536 			 (numverts << RADEON_NUM_VERTICES_SHIFT));
1537 
1538 		ADVANCE_RING();
1539 
1540 		i++;
1541 	} while (i < nbox);
1542 }
1543 
radeon_cp_discard_buffer(struct drm_device * dev,struct drm_buf * buf)1544 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_buf *buf)
1545 {
1546 	drm_radeon_private_t *dev_priv = dev->dev_private;
1547 	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1548 	RING_LOCALS;
1549 
1550 	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1551 
1552 	/* Emit the vertex buffer age */
1553 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1554 		BEGIN_RING(3);
1555 		R600_DISPATCH_AGE(buf_priv->age);
1556 		ADVANCE_RING();
1557 	} else {
1558 		BEGIN_RING(2);
1559 		RADEON_DISPATCH_AGE(buf_priv->age);
1560 		ADVANCE_RING();
1561 	}
1562 
1563 	buf->pending = 1;
1564 	buf->used = 0;
1565 }
1566 
radeon_cp_dispatch_indirect(struct drm_device * dev,struct drm_buf * buf,int start,int end)1567 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1568 					struct drm_buf * buf, int start, int end)
1569 {
1570 	drm_radeon_private_t *dev_priv = dev->dev_private;
1571 	RING_LOCALS;
1572 	DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1573 
1574 	if (start != end) {
1575 		int offset = (dev_priv->gart_buffers_offset
1576 			      + buf->offset + start);
1577 		int dwords = (end - start + 3) / sizeof(u32);
1578 
1579 		/* Indirect buffer data must be an even number of
1580 		 * dwords, so if we've been given an odd number we must
1581 		 * pad the data with a Type-2 CP packet.
1582 		 */
1583 		if (dwords & 1) {
1584 			u32 *data = (u32 *)
1585 			    ((char *)dev->agp_buffer_map->virtual
1586 			     + buf->offset + start);
1587 			data[dwords++] = RADEON_CP_PACKET2;
1588 		}
1589 
1590 		/* Fire off the indirect buffer */
1591 		BEGIN_RING(3);
1592 
1593 		OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1594 		OUT_RING(offset);
1595 		OUT_RING(dwords);
1596 
1597 		ADVANCE_RING();
1598 	}
1599 }
1600 
radeon_cp_dispatch_indices(struct drm_device * dev,struct drm_buf * elt_buf,drm_radeon_tcl_prim_t * prim)1601 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1602 				       struct drm_buf * elt_buf,
1603 				       drm_radeon_tcl_prim_t * prim)
1604 {
1605 	drm_radeon_private_t *dev_priv = dev->dev_private;
1606 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1607 	int offset = dev_priv->gart_buffers_offset + prim->offset;
1608 	u32 *data;
1609 	int dwords;
1610 	int i = 0;
1611 	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1612 	int count = (prim->finish - start) / sizeof(u16);
1613 	int nbox = sarea_priv->nbox;
1614 
1615 	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1616 		  prim->prim,
1617 		  prim->vc_format,
1618 		  prim->start, prim->finish, prim->offset, prim->numverts);
1619 
1620 	if (bad_prim_vertex_nr(prim->prim, count)) {
1621 		DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1622 		return;
1623 	}
1624 
1625 	if (start >= prim->finish || (prim->start & 0x7)) {
1626 		DRM_ERROR("buffer prim %d\n", prim->prim);
1627 		return;
1628 	}
1629 
1630 	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1631 
1632 	data = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1633 			elt_buf->offset + prim->start);
1634 
1635 	data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1636 	data[1] = offset;
1637 	data[2] = prim->numverts;
1638 	data[3] = prim->vc_format;
1639 	data[4] = (prim->prim |
1640 		   RADEON_PRIM_WALK_IND |
1641 		   RADEON_COLOR_ORDER_RGBA |
1642 		   RADEON_VTX_FMT_RADEON_MODE |
1643 		   (count << RADEON_NUM_VERTICES_SHIFT));
1644 
1645 	do {
1646 		if (i < nbox)
1647 			radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1648 
1649 		radeon_cp_dispatch_indirect(dev, elt_buf,
1650 					    prim->start, prim->finish);
1651 
1652 		i++;
1653 	} while (i < nbox);
1654 
1655 }
1656 
1657 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1658 
radeon_cp_dispatch_texture(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_texture_t * tex,drm_radeon_tex_image_t * image)1659 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1660 				      struct drm_file *file_priv,
1661 				      drm_radeon_texture_t * tex,
1662 				      drm_radeon_tex_image_t * image)
1663 {
1664 	drm_radeon_private_t *dev_priv = dev->dev_private;
1665 	struct drm_buf *buf;
1666 	u32 format;
1667 	u32 *buffer;
1668 	const u8 __user *data;
1669 	int size, dwords, tex_width, blit_width, spitch;
1670 	u32 height;
1671 	int i;
1672 	u32 texpitch, microtile;
1673 	u32 offset, byte_offset;
1674 	RING_LOCALS;
1675 
1676 	if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1677 		DRM_ERROR("Invalid destination offset\n");
1678 		return -EINVAL;
1679 	}
1680 
1681 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1682 
1683 	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1684 	 * up with the texture data from the host data blit, otherwise
1685 	 * part of the texture image may be corrupted.
1686 	 */
1687 	BEGIN_RING(4);
1688 	RADEON_FLUSH_CACHE();
1689 	RADEON_WAIT_UNTIL_IDLE();
1690 	ADVANCE_RING();
1691 
1692 	/* The compiler won't optimize away a division by a variable,
1693 	 * even if the only legal values are powers of two.  Thus, we'll
1694 	 * use a shift instead.
1695 	 */
1696 	switch (tex->format) {
1697 	case RADEON_TXFORMAT_ARGB8888:
1698 	case RADEON_TXFORMAT_RGBA8888:
1699 		format = RADEON_COLOR_FORMAT_ARGB8888;
1700 		tex_width = tex->width * 4;
1701 		blit_width = image->width * 4;
1702 		break;
1703 	case RADEON_TXFORMAT_AI88:
1704 	case RADEON_TXFORMAT_ARGB1555:
1705 	case RADEON_TXFORMAT_RGB565:
1706 	case RADEON_TXFORMAT_ARGB4444:
1707 	case RADEON_TXFORMAT_VYUY422:
1708 	case RADEON_TXFORMAT_YVYU422:
1709 		format = RADEON_COLOR_FORMAT_RGB565;
1710 		tex_width = tex->width * 2;
1711 		blit_width = image->width * 2;
1712 		break;
1713 	case RADEON_TXFORMAT_I8:
1714 	case RADEON_TXFORMAT_RGB332:
1715 		format = RADEON_COLOR_FORMAT_CI8;
1716 		tex_width = tex->width * 1;
1717 		blit_width = image->width * 1;
1718 		break;
1719 	default:
1720 		DRM_ERROR("invalid texture format %d\n", tex->format);
1721 		return -EINVAL;
1722 	}
1723 	spitch = blit_width >> 6;
1724 	if (spitch == 0 && image->height > 1)
1725 		return -EINVAL;
1726 
1727 	texpitch = tex->pitch;
1728 	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1729 		microtile = 1;
1730 		if (tex_width < 64) {
1731 			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1732 			/* we got tiled coordinates, untile them */
1733 			image->x *= 2;
1734 		}
1735 	} else
1736 		microtile = 0;
1737 
1738 	/* this might fail for zero-sized uploads - are those illegal? */
1739 	if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1740 				blit_width - 1)) {
1741 		DRM_ERROR("Invalid final destination offset\n");
1742 		return -EINVAL;
1743 	}
1744 
1745 	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1746 
1747 	do {
1748 		DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%d y=%d w=%d h=%d\n",
1749 			  tex->offset >> 10, tex->pitch, tex->format,
1750 			  image->x, image->y, image->width, image->height);
1751 
1752 		/* Make a copy of some parameters in case we have to
1753 		 * update them for a multi-pass texture blit.
1754 		 */
1755 		height = image->height;
1756 		data = (const u8 __user *)image->data;
1757 
1758 		size = height * blit_width;
1759 
1760 		if (size > RADEON_MAX_TEXTURE_SIZE) {
1761 			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1762 			size = height * blit_width;
1763 		} else if (size < 4 && size > 0) {
1764 			size = 4;
1765 		} else if (size == 0) {
1766 			return 0;
1767 		}
1768 
1769 		buf = radeon_freelist_get(dev);
1770 		if (0 && !buf) {
1771 			radeon_do_cp_idle(dev_priv);
1772 			buf = radeon_freelist_get(dev);
1773 		}
1774 		if (!buf) {
1775 			DRM_DEBUG("EAGAIN\n");
1776 			if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1777 				return -EFAULT;
1778 			return -EAGAIN;
1779 		}
1780 
1781 		/* Dispatch the indirect buffer.
1782 		 */
1783 		buffer =
1784 		    (u32 *) ((char *)dev->agp_buffer_map->virtual + buf->offset);
1785 		dwords = size / 4;
1786 
1787 #define RADEON_COPY_MT(_buf, _data, _width) \
1788 	do { \
1789 		if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1790 			DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1791 			return -EFAULT; \
1792 		} \
1793 	} while(0)
1794 
1795 		if (microtile) {
1796 			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1797 			   however, we cannot use blitter directly for texture width < 64 bytes,
1798 			   since minimum tex pitch is 64 bytes and we need this to match
1799 			   the texture width, otherwise the blitter will tile it wrong.
1800 			   Thus, tiling manually in this case. Additionally, need to special
1801 			   case tex height = 1, since our actual image will have height 2
1802 			   and we need to ensure we don't read beyond the texture size
1803 			   from user space. */
1804 			if (tex->height == 1) {
1805 				if (tex_width >= 64 || tex_width <= 16) {
1806 					RADEON_COPY_MT(buffer, data,
1807 						(int)(tex_width * sizeof(u32)));
1808 				} else if (tex_width == 32) {
1809 					RADEON_COPY_MT(buffer, data, 16);
1810 					RADEON_COPY_MT(buffer + 8,
1811 						       data + 16, 16);
1812 				}
1813 			} else if (tex_width >= 64 || tex_width == 16) {
1814 				RADEON_COPY_MT(buffer, data,
1815 					       (int)(dwords * sizeof(u32)));
1816 			} else if (tex_width < 16) {
1817 				for (i = 0; i < tex->height; i++) {
1818 					RADEON_COPY_MT(buffer, data, tex_width);
1819 					buffer += 4;
1820 					data += tex_width;
1821 				}
1822 			} else if (tex_width == 32) {
1823 				/* TODO: make sure this works when not fitting in one buffer
1824 				   (i.e. 32bytes x 2048...) */
1825 				for (i = 0; i < tex->height; i += 2) {
1826 					RADEON_COPY_MT(buffer, data, 16);
1827 					data += 16;
1828 					RADEON_COPY_MT(buffer + 8, data, 16);
1829 					data += 16;
1830 					RADEON_COPY_MT(buffer + 4, data, 16);
1831 					data += 16;
1832 					RADEON_COPY_MT(buffer + 12, data, 16);
1833 					data += 16;
1834 					buffer += 16;
1835 				}
1836 			}
1837 		} else {
1838 			if (tex_width >= 32) {
1839 				/* Texture image width is larger than the minimum, so we
1840 				 * can upload it directly.
1841 				 */
1842 				RADEON_COPY_MT(buffer, data,
1843 					       (int)(dwords * sizeof(u32)));
1844 			} else {
1845 				/* Texture image width is less than the minimum, so we
1846 				 * need to pad out each image scanline to the minimum
1847 				 * width.
1848 				 */
1849 				for (i = 0; i < tex->height; i++) {
1850 					RADEON_COPY_MT(buffer, data, tex_width);
1851 					buffer += 8;
1852 					data += tex_width;
1853 				}
1854 			}
1855 		}
1856 
1857 #undef RADEON_COPY_MT
1858 		byte_offset = (image->y & ~2047) * blit_width;
1859 		buf->file_priv = file_priv;
1860 		buf->used = size;
1861 		offset = dev_priv->gart_buffers_offset + buf->offset;
1862 		BEGIN_RING(9);
1863 		OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1864 		OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1865 			 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1866 			 RADEON_GMC_BRUSH_NONE |
1867 			 (format << 8) |
1868 			 RADEON_GMC_SRC_DATATYPE_COLOR |
1869 			 RADEON_ROP3_S |
1870 			 RADEON_DP_SRC_SOURCE_MEMORY |
1871 			 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1872 		OUT_RING((spitch << 22) | (offset >> 10));
1873 		OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1874 		OUT_RING(0);
1875 		OUT_RING((image->x << 16) | (image->y % 2048));
1876 		OUT_RING((image->width << 16) | height);
1877 		RADEON_WAIT_UNTIL_2D_IDLE();
1878 		ADVANCE_RING();
1879 		COMMIT_RING();
1880 
1881 		radeon_cp_discard_buffer(dev, buf);
1882 
1883 		/* Update the input parameters for next time */
1884 		image->y += height;
1885 		image->height -= height;
1886 		image->data = (const u8 __user *)image->data + size;
1887 	} while (image->height > 0);
1888 
1889 	/* Flush the pixel cache after the blit completes.  This ensures
1890 	 * the texture data is written out to memory before rendering
1891 	 * continues.
1892 	 */
1893 	BEGIN_RING(4);
1894 	RADEON_FLUSH_CACHE();
1895 	RADEON_WAIT_UNTIL_2D_IDLE();
1896 	ADVANCE_RING();
1897 	COMMIT_RING();
1898 
1899 	return 0;
1900 }
1901 
radeon_cp_dispatch_stipple(struct drm_device * dev,u32 * stipple)1902 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1903 {
1904 	drm_radeon_private_t *dev_priv = dev->dev_private;
1905 	int i;
1906 	RING_LOCALS;
1907 	DRM_DEBUG("\n");
1908 
1909 	BEGIN_RING(35);
1910 
1911 	OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1912 	OUT_RING(0x00000000);
1913 
1914 	OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1915 	for (i = 0; i < 32; i++) {
1916 		OUT_RING(stipple[i]);
1917 	}
1918 
1919 	ADVANCE_RING();
1920 }
1921 
radeon_apply_surface_regs(int surf_index,drm_radeon_private_t * dev_priv)1922 static void radeon_apply_surface_regs(int surf_index,
1923 				      drm_radeon_private_t *dev_priv)
1924 {
1925 	if (!dev_priv->mmio)
1926 		return;
1927 
1928 	radeon_do_cp_idle(dev_priv);
1929 
1930 	RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1931 		     dev_priv->surfaces[surf_index].flags);
1932 	RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1933 		     dev_priv->surfaces[surf_index].lower);
1934 	RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1935 		     dev_priv->surfaces[surf_index].upper);
1936 }
1937 
1938 /* Allocates a virtual surface
1939  * doesn't always allocate a real surface, will stretch an existing
1940  * surface when possible.
1941  *
1942  * Note that refcount can be at most 2, since during a free refcount=3
1943  * might mean we have to allocate a new surface which might not always
1944  * be available.
1945  * For example : we allocate three contigous surfaces ABC. If B is
1946  * freed, we suddenly need two surfaces to store A and C, which might
1947  * not always be available.
1948  */
alloc_surface(drm_radeon_surface_alloc_t * new,drm_radeon_private_t * dev_priv,struct drm_file * file_priv)1949 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1950 			 drm_radeon_private_t *dev_priv,
1951 			 struct drm_file *file_priv)
1952 {
1953 	struct radeon_virt_surface *s;
1954 	int i;
1955 	int virt_surface_index;
1956 	uint32_t new_upper, new_lower;
1957 
1958 	new_lower = new->address;
1959 	new_upper = new_lower + new->size - 1;
1960 
1961 	/* sanity check */
1962 	if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1963 	    ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1964 	     RADEON_SURF_ADDRESS_FIXED_MASK)
1965 	    || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1966 		return -1;
1967 
1968 	/* make sure there is no overlap with existing surfaces */
1969 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1970 		if ((dev_priv->surfaces[i].refcount != 0) &&
1971 		    (((new_lower >= dev_priv->surfaces[i].lower) &&
1972 		      (new_lower < dev_priv->surfaces[i].upper)) ||
1973 		     ((new_lower < dev_priv->surfaces[i].lower) &&
1974 		      (new_upper > dev_priv->surfaces[i].lower)))) {
1975 			return -1;
1976 		}
1977 	}
1978 
1979 	/* find a virtual surface */
1980 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1981 		if (dev_priv->virt_surfaces[i].file_priv == 0)
1982 			break;
1983 	if (i == 2 * RADEON_MAX_SURFACES) {
1984 		return -1;
1985 	}
1986 	virt_surface_index = i;
1987 
1988 	/* try to reuse an existing surface */
1989 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1990 		/* extend before */
1991 		if ((dev_priv->surfaces[i].refcount == 1) &&
1992 		    (new->flags == dev_priv->surfaces[i].flags) &&
1993 		    (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1994 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
1995 			s->surface_index = i;
1996 			s->lower = new_lower;
1997 			s->upper = new_upper;
1998 			s->flags = new->flags;
1999 			s->file_priv = file_priv;
2000 			dev_priv->surfaces[i].refcount++;
2001 			dev_priv->surfaces[i].lower = s->lower;
2002 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2003 			return virt_surface_index;
2004 		}
2005 
2006 		/* extend after */
2007 		if ((dev_priv->surfaces[i].refcount == 1) &&
2008 		    (new->flags == dev_priv->surfaces[i].flags) &&
2009 		    (new_lower == dev_priv->surfaces[i].upper + 1)) {
2010 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2011 			s->surface_index = i;
2012 			s->lower = new_lower;
2013 			s->upper = new_upper;
2014 			s->flags = new->flags;
2015 			s->file_priv = file_priv;
2016 			dev_priv->surfaces[i].refcount++;
2017 			dev_priv->surfaces[i].upper = s->upper;
2018 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2019 			return virt_surface_index;
2020 		}
2021 	}
2022 
2023 	/* okay, we need a new one */
2024 	for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2025 		if (dev_priv->surfaces[i].refcount == 0) {
2026 			s = &(dev_priv->virt_surfaces[virt_surface_index]);
2027 			s->surface_index = i;
2028 			s->lower = new_lower;
2029 			s->upper = new_upper;
2030 			s->flags = new->flags;
2031 			s->file_priv = file_priv;
2032 			dev_priv->surfaces[i].refcount = 1;
2033 			dev_priv->surfaces[i].lower = s->lower;
2034 			dev_priv->surfaces[i].upper = s->upper;
2035 			dev_priv->surfaces[i].flags = s->flags;
2036 			radeon_apply_surface_regs(s->surface_index, dev_priv);
2037 			return virt_surface_index;
2038 		}
2039 	}
2040 
2041 	/* we didn't find anything */
2042 	return -1;
2043 }
2044 
free_surface(struct drm_file * file_priv,drm_radeon_private_t * dev_priv,int lower)2045 static int free_surface(struct drm_file *file_priv,
2046 			drm_radeon_private_t * dev_priv,
2047 			int lower)
2048 {
2049 	struct radeon_virt_surface *s;
2050 	int i;
2051 	/* find the virtual surface */
2052 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2053 		s = &(dev_priv->virt_surfaces[i]);
2054 		if (s->file_priv) {
2055 			if ((lower == s->lower) && (file_priv == s->file_priv))
2056 			{
2057 				if (dev_priv->surfaces[s->surface_index].
2058 				    lower == s->lower)
2059 					dev_priv->surfaces[s->surface_index].
2060 					    lower = s->upper;
2061 
2062 				if (dev_priv->surfaces[s->surface_index].
2063 				    upper == s->upper)
2064 					dev_priv->surfaces[s->surface_index].
2065 					    upper = s->lower;
2066 
2067 				dev_priv->surfaces[s->surface_index].refcount--;
2068 				if (dev_priv->surfaces[s->surface_index].
2069 				    refcount == 0)
2070 					dev_priv->surfaces[s->surface_index].
2071 					    flags = 0;
2072 				s->file_priv = NULL;
2073 				radeon_apply_surface_regs(s->surface_index,
2074 							  dev_priv);
2075 				return 0;
2076 			}
2077 		}
2078 	}
2079 	return 1;
2080 }
2081 
radeon_surfaces_release(struct drm_file * file_priv,drm_radeon_private_t * dev_priv)2082 static void radeon_surfaces_release(struct drm_file *file_priv,
2083 				    drm_radeon_private_t * dev_priv)
2084 {
2085 	int i;
2086 	for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2087 		if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2088 			free_surface(file_priv, dev_priv,
2089 				     dev_priv->virt_surfaces[i].lower);
2090 	}
2091 }
2092 
2093 /* ================================================================
2094  * IOCTL functions
2095  */
radeon_surface_alloc(struct drm_device * dev,void * data,struct drm_file * file_priv)2096 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2097 {
2098 	drm_radeon_private_t *dev_priv = dev->dev_private;
2099 	drm_radeon_surface_alloc_t *alloc = data;
2100 
2101 	if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2102 		return -EINVAL;
2103 	else
2104 		return 0;
2105 }
2106 
radeon_surface_free(struct drm_device * dev,void * data,struct drm_file * file_priv)2107 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2108 {
2109 	drm_radeon_private_t *dev_priv = dev->dev_private;
2110 	drm_radeon_surface_free_t *memfree = data;
2111 
2112 	if (free_surface(file_priv, dev_priv, memfree->address))
2113 		return -EINVAL;
2114 	else
2115 		return 0;
2116 }
2117 
radeon_cp_clear(struct drm_device * dev,void * data,struct drm_file * file_priv)2118 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2119 {
2120 	drm_radeon_private_t *dev_priv = dev->dev_private;
2121 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2122 	drm_radeon_clear_t *clear = data;
2123 	drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2124 	DRM_DEBUG("\n");
2125 
2126 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2127 
2128 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2129 
2130 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2131 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2132 
2133 	if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2134 			       sarea_priv->nbox * sizeof(depth_boxes[0])))
2135 		return -EFAULT;
2136 
2137 	radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2138 
2139 	COMMIT_RING();
2140 	return 0;
2141 }
2142 
2143 /* Not sure why this isn't set all the time:
2144  */
radeon_do_init_pageflip(struct drm_device * dev)2145 static int radeon_do_init_pageflip(struct drm_device *dev)
2146 {
2147 	drm_radeon_private_t *dev_priv = dev->dev_private;
2148 	RING_LOCALS;
2149 
2150 	DRM_DEBUG("\n");
2151 
2152 	BEGIN_RING(6);
2153 	RADEON_WAIT_UNTIL_3D_IDLE();
2154 	OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2155 	OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2156 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2157 	OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2158 	OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2159 		 RADEON_CRTC_OFFSET_FLIP_CNTL);
2160 	ADVANCE_RING();
2161 
2162 	dev_priv->page_flipping = 1;
2163 
2164 	if (dev_priv->sarea_priv->pfCurrentPage != 1)
2165 		dev_priv->sarea_priv->pfCurrentPage = 0;
2166 
2167 	return 0;
2168 }
2169 
2170 /* Swapping and flipping are different operations, need different ioctls.
2171  * They can & should be intermixed to support multiple 3d windows.
2172  */
radeon_cp_flip(struct drm_device * dev,void * data,struct drm_file * file_priv)2173 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2174 {
2175 	drm_radeon_private_t *dev_priv = dev->dev_private;
2176 	DRM_DEBUG("\n");
2177 
2178 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2179 
2180 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2181 
2182 	if (!dev_priv->page_flipping)
2183 		radeon_do_init_pageflip(dev);
2184 
2185 	radeon_cp_dispatch_flip(dev);
2186 
2187 	COMMIT_RING();
2188 	return 0;
2189 }
2190 
radeon_cp_swap(struct drm_device * dev,void * data,struct drm_file * file_priv)2191 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2192 {
2193 	drm_radeon_private_t *dev_priv = dev->dev_private;
2194 	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2195 
2196 	DRM_DEBUG("\n");
2197 
2198 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2199 
2200 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2201 
2202 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2203 		sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2204 
2205 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2206 		r600_cp_dispatch_swap(dev);
2207 	else
2208 		radeon_cp_dispatch_swap(dev);
2209 	sarea_priv->ctx_owner = 0;
2210 
2211 	COMMIT_RING();
2212 	return 0;
2213 }
2214 
radeon_cp_vertex(struct drm_device * dev,void * data,struct drm_file * file_priv)2215 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2216 {
2217 	drm_radeon_private_t *dev_priv = dev->dev_private;
2218 	drm_radeon_sarea_t *sarea_priv;
2219 	struct drm_device_dma *dma = dev->dma;
2220 	struct drm_buf *buf;
2221 	drm_radeon_vertex_t *vertex = data;
2222 	drm_radeon_tcl_prim_t prim;
2223 
2224 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2225 
2226 	sarea_priv = dev_priv->sarea_priv;
2227 
2228 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2229 		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2230 
2231 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2232 		DRM_ERROR("buffer index %d (of %d max)\n",
2233 			  vertex->idx, dma->buf_count - 1);
2234 		return -EINVAL;
2235 	}
2236 	if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2237 		DRM_ERROR("buffer prim %d\n", vertex->prim);
2238 		return -EINVAL;
2239 	}
2240 
2241 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2242 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2243 
2244 	buf = dma->buflist[vertex->idx];
2245 
2246 	if (buf->file_priv != file_priv) {
2247 		DRM_ERROR("process %d using buffer owned by %p\n",
2248 			  DRM_CURRENTPID, buf->file_priv);
2249 		return -EINVAL;
2250 	}
2251 	if (buf->pending) {
2252 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2253 		return -EINVAL;
2254 	}
2255 
2256 	/* Build up a prim_t record:
2257 	 */
2258 	if (vertex->count) {
2259 		buf->used = vertex->count;	/* not used? */
2260 
2261 		if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2262 			if (radeon_emit_state(dev_priv, file_priv,
2263 					      &sarea_priv->context_state,
2264 					      sarea_priv->tex_state,
2265 					      sarea_priv->dirty)) {
2266 				DRM_ERROR("radeon_emit_state failed\n");
2267 				return -EINVAL;
2268 			}
2269 
2270 			sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2271 					       RADEON_UPLOAD_TEX1IMAGES |
2272 					       RADEON_UPLOAD_TEX2IMAGES |
2273 					       RADEON_REQUIRE_QUIESCENCE);
2274 		}
2275 
2276 		prim.start = 0;
2277 		prim.finish = vertex->count;	/* unused */
2278 		prim.prim = vertex->prim;
2279 		prim.numverts = vertex->count;
2280 		prim.vc_format = sarea_priv->vc_format;
2281 
2282 		radeon_cp_dispatch_vertex(dev, buf, &prim);
2283 	}
2284 
2285 	if (vertex->discard) {
2286 		radeon_cp_discard_buffer(dev, buf);
2287 	}
2288 
2289 	COMMIT_RING();
2290 	return 0;
2291 }
2292 
radeon_cp_indices(struct drm_device * dev,void * data,struct drm_file * file_priv)2293 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2294 {
2295 	drm_radeon_private_t *dev_priv = dev->dev_private;
2296 	drm_radeon_sarea_t *sarea_priv;
2297 	struct drm_device_dma *dma = dev->dma;
2298 	struct drm_buf *buf;
2299 	drm_radeon_indices_t *elts = data;
2300 	drm_radeon_tcl_prim_t prim;
2301 	int count;
2302 
2303 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2304 
2305 	sarea_priv = dev_priv->sarea_priv;
2306 
2307 	DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2308 		  DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2309 		  elts->discard);
2310 
2311 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2312 		DRM_ERROR("buffer index %d (of %d max)\n",
2313 			  elts->idx, dma->buf_count - 1);
2314 		return -EINVAL;
2315 	}
2316 	if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2317 		DRM_ERROR("buffer prim %d\n", elts->prim);
2318 		return -EINVAL;
2319 	}
2320 
2321 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2322 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2323 
2324 	buf = dma->buflist[elts->idx];
2325 
2326 	if (buf->file_priv != file_priv) {
2327 		DRM_ERROR("process %d using buffer owned by %p\n",
2328 			  DRM_CURRENTPID, buf->file_priv);
2329 		return -EINVAL;
2330 	}
2331 	if (buf->pending) {
2332 		DRM_ERROR("sending pending buffer %d\n", elts->idx);
2333 		return -EINVAL;
2334 	}
2335 
2336 	count = (elts->end - elts->start) / sizeof(u16);
2337 	elts->start -= RADEON_INDEX_PRIM_OFFSET;
2338 
2339 	if (elts->start & 0x7) {
2340 		DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2341 		return -EINVAL;
2342 	}
2343 	if (elts->start < buf->used) {
2344 		DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2345 		return -EINVAL;
2346 	}
2347 
2348 	buf->used = elts->end;
2349 
2350 	if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2351 		if (radeon_emit_state(dev_priv, file_priv,
2352 				      &sarea_priv->context_state,
2353 				      sarea_priv->tex_state,
2354 				      sarea_priv->dirty)) {
2355 			DRM_ERROR("radeon_emit_state failed\n");
2356 			return -EINVAL;
2357 		}
2358 
2359 		sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2360 				       RADEON_UPLOAD_TEX1IMAGES |
2361 				       RADEON_UPLOAD_TEX2IMAGES |
2362 				       RADEON_REQUIRE_QUIESCENCE);
2363 	}
2364 
2365 	/* Build up a prim_t record:
2366 	 */
2367 	prim.start = elts->start;
2368 	prim.finish = elts->end;
2369 	prim.prim = elts->prim;
2370 	prim.offset = 0;	/* offset from start of dma buffers */
2371 	prim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2372 	prim.vc_format = sarea_priv->vc_format;
2373 
2374 	radeon_cp_dispatch_indices(dev, buf, &prim);
2375 	if (elts->discard) {
2376 		radeon_cp_discard_buffer(dev, buf);
2377 	}
2378 
2379 	COMMIT_RING();
2380 	return 0;
2381 }
2382 
radeon_cp_texture(struct drm_device * dev,void * data,struct drm_file * file_priv)2383 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2384 {
2385 	drm_radeon_private_t *dev_priv = dev->dev_private;
2386 	drm_radeon_texture_t *tex = data;
2387 	drm_radeon_tex_image_t image;
2388 	int ret;
2389 
2390 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2391 
2392 	if (tex->image == NULL) {
2393 		DRM_ERROR("null texture image!\n");
2394 		return -EINVAL;
2395 	}
2396 
2397 	if (DRM_COPY_FROM_USER(&image,
2398 			       (drm_radeon_tex_image_t __user *) tex->image,
2399 			       sizeof(image)))
2400 		return -EFAULT;
2401 
2402 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2403 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2404 
2405 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2406 		ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2407 	else
2408 		ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2409 
2410 	return ret;
2411 }
2412 
radeon_cp_stipple(struct drm_device * dev,void * data,struct drm_file * file_priv)2413 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2414 {
2415 	drm_radeon_private_t *dev_priv = dev->dev_private;
2416 	drm_radeon_stipple_t *stipple = data;
2417 	u32 mask[32];
2418 
2419 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2420 
2421 	if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2422 		return -EFAULT;
2423 
2424 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2425 
2426 	radeon_cp_dispatch_stipple(dev, mask);
2427 
2428 	COMMIT_RING();
2429 	return 0;
2430 }
2431 
radeon_cp_indirect(struct drm_device * dev,void * data,struct drm_file * file_priv)2432 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2433 {
2434 	drm_radeon_private_t *dev_priv = dev->dev_private;
2435 	struct drm_device_dma *dma = dev->dma;
2436 	struct drm_buf *buf;
2437 	drm_radeon_indirect_t *indirect = data;
2438 	RING_LOCALS;
2439 
2440 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2441 
2442 	if (!dev_priv) {
2443 		DRM_ERROR("called with no initialization\n");
2444 		return -EINVAL;
2445 	}
2446 
2447 	DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2448 		  indirect->idx, indirect->start, indirect->end,
2449 		  indirect->discard);
2450 
2451 	if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2452 		DRM_ERROR("buffer index %d (of %d max)\n",
2453 			  indirect->idx, dma->buf_count - 1);
2454 		return -EINVAL;
2455 	}
2456 
2457 	buf = dma->buflist[indirect->idx];
2458 
2459 	if (buf->file_priv != file_priv) {
2460 		DRM_ERROR("process %d using buffer owned by %p\n",
2461 			  DRM_CURRENTPID, buf->file_priv);
2462 		return -EINVAL;
2463 	}
2464 	if (buf->pending) {
2465 		DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2466 		return -EINVAL;
2467 	}
2468 
2469 	if (indirect->start < buf->used) {
2470 		DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2471 			  indirect->start, buf->used);
2472 		return -EINVAL;
2473 	}
2474 
2475 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2476 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2477 
2478 	buf->used = indirect->end;
2479 
2480 	/* Dispatch the indirect buffer full of commands from the
2481 	 * X server.  This is insecure and is thus only available to
2482 	 * privileged clients.
2483 	 */
2484 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2485 		r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2486 	else {
2487 		/* Wait for the 3D stream to idle before the indirect buffer
2488 		 * containing 2D acceleration commands is processed.
2489 		 */
2490 		BEGIN_RING(2);
2491 		RADEON_WAIT_UNTIL_3D_IDLE();
2492 		ADVANCE_RING();
2493 		radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2494 	}
2495 
2496 	if (indirect->discard)
2497 		radeon_cp_discard_buffer(dev, buf);
2498 
2499 	COMMIT_RING();
2500 	return 0;
2501 }
2502 
radeon_cp_vertex2(struct drm_device * dev,void * data,struct drm_file * file_priv)2503 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2504 {
2505 	drm_radeon_private_t *dev_priv = dev->dev_private;
2506 	drm_radeon_sarea_t *sarea_priv;
2507 	struct drm_device_dma *dma = dev->dma;
2508 	struct drm_buf *buf;
2509 	drm_radeon_vertex2_t *vertex = data;
2510 	int i;
2511 	unsigned char laststate;
2512 
2513 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2514 
2515 	sarea_priv = dev_priv->sarea_priv;
2516 
2517 	DRM_DEBUG("pid=%d index=%d discard=%d\n",
2518 		  DRM_CURRENTPID, vertex->idx, vertex->discard);
2519 
2520 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2521 		DRM_ERROR("buffer index %d (of %d max)\n",
2522 			  vertex->idx, dma->buf_count - 1);
2523 		return -EINVAL;
2524 	}
2525 
2526 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2527 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2528 
2529 	buf = dma->buflist[vertex->idx];
2530 
2531 	if (buf->file_priv != file_priv) {
2532 		DRM_ERROR("process %d using buffer owned by %p\n",
2533 			  DRM_CURRENTPID, buf->file_priv);
2534 		return -EINVAL;
2535 	}
2536 
2537 	if (buf->pending) {
2538 		DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2539 		return -EINVAL;
2540 	}
2541 
2542 	if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2543 		return -EINVAL;
2544 
2545 	for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2546 		drm_radeon_prim_t prim;
2547 		drm_radeon_tcl_prim_t tclprim;
2548 
2549 		if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2550 			return -EFAULT;
2551 
2552 		if (prim.stateidx != laststate) {
2553 			drm_radeon_state_t state;
2554 
2555 			if (DRM_COPY_FROM_USER(&state,
2556 					       &vertex->state[prim.stateidx],
2557 					       sizeof(state)))
2558 				return -EFAULT;
2559 
2560 			if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2561 				DRM_ERROR("radeon_emit_state2 failed\n");
2562 				return -EINVAL;
2563 			}
2564 
2565 			laststate = prim.stateidx;
2566 		}
2567 
2568 		tclprim.start = prim.start;
2569 		tclprim.finish = prim.finish;
2570 		tclprim.prim = prim.prim;
2571 		tclprim.vc_format = prim.vc_format;
2572 
2573 		if (prim.prim & RADEON_PRIM_WALK_IND) {
2574 			tclprim.offset = prim.numverts * 64;
2575 			tclprim.numverts = RADEON_MAX_VB_VERTS;	/* duh */
2576 
2577 			radeon_cp_dispatch_indices(dev, buf, &tclprim);
2578 		} else {
2579 			tclprim.numverts = prim.numverts;
2580 			tclprim.offset = 0;	/* not used */
2581 
2582 			radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2583 		}
2584 
2585 		if (sarea_priv->nbox == 1)
2586 			sarea_priv->nbox = 0;
2587 	}
2588 
2589 	if (vertex->discard) {
2590 		radeon_cp_discard_buffer(dev, buf);
2591 	}
2592 
2593 	COMMIT_RING();
2594 	return 0;
2595 }
2596 
radeon_emit_packets(drm_radeon_private_t * dev_priv,struct drm_file * file_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2597 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2598 			       struct drm_file *file_priv,
2599 			       drm_radeon_cmd_header_t header,
2600 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2601 {
2602 	int id = (int)header.packet.packet_id;
2603 	int sz, reg;
2604 	int *data = (int *)cmdbuf->buf;
2605 	RING_LOCALS;
2606 
2607 	if (id >= RADEON_MAX_STATE_PACKETS)
2608 		return -EINVAL;
2609 
2610 	sz = packet[id].len;
2611 	reg = packet[id].start;
2612 
2613 	if (sz * sizeof(int) > cmdbuf->bufsz) {
2614 		DRM_ERROR("Packet size provided larger than data provided\n");
2615 		return -EINVAL;
2616 	}
2617 
2618 	if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2619 		DRM_ERROR("Packet verification failed\n");
2620 		return -EINVAL;
2621 	}
2622 
2623 	BEGIN_RING(sz + 1);
2624 	OUT_RING(CP_PACKET0(reg, (sz - 1)));
2625 	OUT_RING_TABLE(data, sz);
2626 	ADVANCE_RING();
2627 
2628 	cmdbuf->buf += sz * sizeof(int);
2629 	cmdbuf->bufsz -= sz * sizeof(int);
2630 	return 0;
2631 }
2632 
radeon_emit_scalars(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2633 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2634 					  drm_radeon_cmd_header_t header,
2635 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2636 {
2637 	int sz = header.scalars.count;
2638 	int start = header.scalars.offset;
2639 	int stride = header.scalars.stride;
2640 	RING_LOCALS;
2641 
2642 	BEGIN_RING(3 + sz);
2643 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2644 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2645 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2646 	OUT_RING_TABLE(cmdbuf->buf, sz);
2647 	ADVANCE_RING();
2648 	cmdbuf->buf += sz * sizeof(int);
2649 	cmdbuf->bufsz -= sz * sizeof(int);
2650 	return 0;
2651 }
2652 
2653 /* God this is ugly
2654  */
radeon_emit_scalars2(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2655 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2656 					   drm_radeon_cmd_header_t header,
2657 					   drm_radeon_kcmd_buffer_t *cmdbuf)
2658 {
2659 	int sz = header.scalars.count;
2660 	int start = ((unsigned int)header.scalars.offset) + 0x100;
2661 	int stride = header.scalars.stride;
2662 	RING_LOCALS;
2663 
2664 	BEGIN_RING(3 + sz);
2665 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2666 	OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2667 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2668 	OUT_RING_TABLE(cmdbuf->buf, sz);
2669 	ADVANCE_RING();
2670 	cmdbuf->buf += sz * sizeof(int);
2671 	cmdbuf->bufsz -= sz * sizeof(int);
2672 	return 0;
2673 }
2674 
radeon_emit_vectors(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2675 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2676 					  drm_radeon_cmd_header_t header,
2677 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2678 {
2679 	int sz = header.vectors.count;
2680 	int start = header.vectors.offset;
2681 	int stride = header.vectors.stride;
2682 	RING_LOCALS;
2683 
2684 	BEGIN_RING(5 + sz);
2685 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2686 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2687 	OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2688 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2689 	OUT_RING_TABLE(cmdbuf->buf, sz);
2690 	ADVANCE_RING();
2691 
2692 	cmdbuf->buf += sz * sizeof(int);
2693 	cmdbuf->bufsz -= sz * sizeof(int);
2694 	return 0;
2695 }
2696 
radeon_emit_veclinear(drm_radeon_private_t * dev_priv,drm_radeon_cmd_header_t header,drm_radeon_kcmd_buffer_t * cmdbuf)2697 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2698 					  drm_radeon_cmd_header_t header,
2699 					  drm_radeon_kcmd_buffer_t *cmdbuf)
2700 {
2701 	int sz = header.veclinear.count * 4;
2702 	int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2703 	RING_LOCALS;
2704 
2705         if (!sz)
2706                 return 0;
2707         if (sz * 4 > cmdbuf->bufsz)
2708                 return -EINVAL;
2709 
2710 	BEGIN_RING(5 + sz);
2711 	OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2712 	OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2713 	OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2714 	OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2715 	OUT_RING_TABLE(cmdbuf->buf, sz);
2716 	ADVANCE_RING();
2717 
2718 	cmdbuf->buf += sz * sizeof(int);
2719 	cmdbuf->bufsz -= sz * sizeof(int);
2720 	return 0;
2721 }
2722 
radeon_emit_packet3(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf)2723 static int radeon_emit_packet3(struct drm_device * dev,
2724 			       struct drm_file *file_priv,
2725 			       drm_radeon_kcmd_buffer_t *cmdbuf)
2726 {
2727 	drm_radeon_private_t *dev_priv = dev->dev_private;
2728 	unsigned int cmdsz;
2729 	int ret;
2730 	RING_LOCALS;
2731 
2732 	DRM_DEBUG("\n");
2733 
2734 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2735 						  cmdbuf, &cmdsz))) {
2736 		DRM_ERROR("Packet verification failed\n");
2737 		return ret;
2738 	}
2739 
2740 	BEGIN_RING(cmdsz);
2741 	OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2742 	ADVANCE_RING();
2743 
2744 	cmdbuf->buf += cmdsz * 4;
2745 	cmdbuf->bufsz -= cmdsz * 4;
2746 	return 0;
2747 }
2748 
radeon_emit_packet3_cliprect(struct drm_device * dev,struct drm_file * file_priv,drm_radeon_kcmd_buffer_t * cmdbuf,int orig_nbox)2749 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2750 					struct drm_file *file_priv,
2751 					drm_radeon_kcmd_buffer_t *cmdbuf,
2752 					int orig_nbox)
2753 {
2754 	drm_radeon_private_t *dev_priv = dev->dev_private;
2755 	struct drm_clip_rect box;
2756 	unsigned int cmdsz;
2757 	int ret;
2758 	struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2759 	int i = 0;
2760 	RING_LOCALS;
2761 
2762 	DRM_DEBUG("\n");
2763 
2764 	if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2765 						  cmdbuf, &cmdsz))) {
2766 		DRM_ERROR("Packet verification failed\n");
2767 		return ret;
2768 	}
2769 
2770 	if (!orig_nbox)
2771 		goto out;
2772 
2773 	do {
2774 		if (i < cmdbuf->nbox) {
2775 			if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2776 				return -EFAULT;
2777 			/* FIXME The second and subsequent times round
2778 			 * this loop, send a WAIT_UNTIL_3D_IDLE before
2779 			 * calling emit_clip_rect(). This fixes a
2780 			 * lockup on fast machines when sending
2781 			 * several cliprects with a cmdbuf, as when
2782 			 * waving a 2D window over a 3D
2783 			 * window. Something in the commands from user
2784 			 * space seems to hang the card when they're
2785 			 * sent several times in a row. That would be
2786 			 * the correct place to fix it but this works
2787 			 * around it until I can figure that out - Tim
2788 			 * Smith */
2789 			if (i) {
2790 				BEGIN_RING(2);
2791 				RADEON_WAIT_UNTIL_3D_IDLE();
2792 				ADVANCE_RING();
2793 			}
2794 			radeon_emit_clip_rect(dev_priv, &box);
2795 		}
2796 
2797 		BEGIN_RING(cmdsz);
2798 		OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2799 		ADVANCE_RING();
2800 
2801 	} while (++i < cmdbuf->nbox);
2802 	if (cmdbuf->nbox == 1)
2803 		cmdbuf->nbox = 0;
2804 
2805       out:
2806 	cmdbuf->buf += cmdsz * 4;
2807 	cmdbuf->bufsz -= cmdsz * 4;
2808 	return 0;
2809 }
2810 
radeon_emit_wait(struct drm_device * dev,int flags)2811 static int radeon_emit_wait(struct drm_device * dev, int flags)
2812 {
2813 	drm_radeon_private_t *dev_priv = dev->dev_private;
2814 	RING_LOCALS;
2815 
2816 	DRM_DEBUG("%x\n", flags);
2817 	switch (flags) {
2818 	case RADEON_WAIT_2D:
2819 		BEGIN_RING(2);
2820 		RADEON_WAIT_UNTIL_2D_IDLE();
2821 		ADVANCE_RING();
2822 		break;
2823 	case RADEON_WAIT_3D:
2824 		BEGIN_RING(2);
2825 		RADEON_WAIT_UNTIL_3D_IDLE();
2826 		ADVANCE_RING();
2827 		break;
2828 	case RADEON_WAIT_2D | RADEON_WAIT_3D:
2829 		BEGIN_RING(2);
2830 		RADEON_WAIT_UNTIL_IDLE();
2831 		ADVANCE_RING();
2832 		break;
2833 	default:
2834 		return -EINVAL;
2835 	}
2836 
2837 	return 0;
2838 }
2839 
radeon_cp_cmdbuf(struct drm_device * dev,void * data,struct drm_file * file_priv)2840 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2841 {
2842 	drm_radeon_private_t *dev_priv = dev->dev_private;
2843 	struct drm_device_dma *dma = dev->dma;
2844 	struct drm_buf *buf = NULL;
2845 	int idx;
2846 	drm_radeon_kcmd_buffer_t *cmdbuf = data;
2847 	drm_radeon_cmd_header_t header;
2848 	int orig_nbox, orig_bufsz;
2849 	char *kbuf = NULL;
2850 
2851 	LOCK_TEST_WITH_RETURN(dev, file_priv);
2852 
2853 	RING_SPACE_TEST_WITH_RETURN(dev_priv);
2854 	VB_AGE_TEST_WITH_RETURN(dev_priv);
2855 
2856 	if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2857 		return -EINVAL;
2858 	}
2859 
2860 	/* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2861 	 * races between checking values and using those values in other code,
2862 	 * and simply to avoid a lot of function calls to copy in data.
2863 	 */
2864 	orig_bufsz = cmdbuf->bufsz;
2865 	if (orig_bufsz != 0) {
2866 		kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2867 		if (kbuf == NULL)
2868 			return -ENOMEM;
2869 		if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2870 				       cmdbuf->bufsz)) {
2871 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2872 			return -EFAULT;
2873 		}
2874 		cmdbuf->buf = kbuf;
2875 	}
2876 
2877 	orig_nbox = cmdbuf->nbox;
2878 
2879 	if (dev_priv->microcode_version == UCODE_R300) {
2880 		int temp;
2881 		temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2882 
2883 		if (orig_bufsz != 0)
2884 			drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2885 
2886 		return temp;
2887 	}
2888 
2889 	/* microcode_version != r300 */
2890 	while (cmdbuf->bufsz >= sizeof(header)) {
2891 
2892 		header.i = *(int *)cmdbuf->buf;
2893 		cmdbuf->buf += sizeof(header);
2894 		cmdbuf->bufsz -= sizeof(header);
2895 
2896 		switch (header.header.cmd_type) {
2897 		case RADEON_CMD_PACKET:
2898 			DRM_DEBUG("RADEON_CMD_PACKET\n");
2899 			if (radeon_emit_packets
2900 			    (dev_priv, file_priv, header, cmdbuf)) {
2901 				DRM_ERROR("radeon_emit_packets failed\n");
2902 				goto err;
2903 			}
2904 			break;
2905 
2906 		case RADEON_CMD_SCALARS:
2907 			DRM_DEBUG("RADEON_CMD_SCALARS\n");
2908 			if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2909 				DRM_ERROR("radeon_emit_scalars failed\n");
2910 				goto err;
2911 			}
2912 			break;
2913 
2914 		case RADEON_CMD_VECTORS:
2915 			DRM_DEBUG("RADEON_CMD_VECTORS\n");
2916 			if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2917 				DRM_ERROR("radeon_emit_vectors failed\n");
2918 				goto err;
2919 			}
2920 			break;
2921 
2922 		case RADEON_CMD_DMA_DISCARD:
2923 			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2924 			idx = header.dma.buf_idx;
2925 			if (idx < 0 || idx >= dma->buf_count) {
2926 				DRM_ERROR("buffer index %d (of %d max)\n",
2927 					  idx, dma->buf_count - 1);
2928 				goto err;
2929 			}
2930 
2931 			buf = dma->buflist[idx];
2932 			if (buf->file_priv != file_priv || buf->pending) {
2933 				DRM_ERROR("bad buffer %p %p %d\n",
2934 					  buf->file_priv, file_priv,
2935 					  buf->pending);
2936 				goto err;
2937 			}
2938 
2939 			radeon_cp_discard_buffer(dev, buf);
2940 			break;
2941 
2942 		case RADEON_CMD_PACKET3:
2943 			DRM_DEBUG("RADEON_CMD_PACKET3\n");
2944 			if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2945 				DRM_ERROR("radeon_emit_packet3 failed\n");
2946 				goto err;
2947 			}
2948 			break;
2949 
2950 		case RADEON_CMD_PACKET3_CLIP:
2951 			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2952 			if (radeon_emit_packet3_cliprect
2953 			    (dev, file_priv, cmdbuf, orig_nbox)) {
2954 				DRM_ERROR("radeon_emit_packet3_clip failed\n");
2955 				goto err;
2956 			}
2957 			break;
2958 
2959 		case RADEON_CMD_SCALARS2:
2960 			DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2961 			if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2962 				DRM_ERROR("radeon_emit_scalars2 failed\n");
2963 				goto err;
2964 			}
2965 			break;
2966 
2967 		case RADEON_CMD_WAIT:
2968 			DRM_DEBUG("RADEON_CMD_WAIT\n");
2969 			if (radeon_emit_wait(dev, header.wait.flags)) {
2970 				DRM_ERROR("radeon_emit_wait failed\n");
2971 				goto err;
2972 			}
2973 			break;
2974 		case RADEON_CMD_VECLINEAR:
2975 			DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2976 			if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2977 				DRM_ERROR("radeon_emit_veclinear failed\n");
2978 				goto err;
2979 			}
2980 			break;
2981 
2982 		default:
2983 			DRM_ERROR("bad cmd_type %d at %p\n",
2984 				  header.header.cmd_type,
2985 				  cmdbuf->buf - sizeof(header));
2986 			goto err;
2987 		}
2988 	}
2989 
2990 	if (orig_bufsz != 0)
2991 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2992 
2993 	DRM_DEBUG("DONE\n");
2994 	COMMIT_RING();
2995 	return 0;
2996 
2997       err:
2998 	if (orig_bufsz != 0)
2999 		drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3000 	return -EINVAL;
3001 }
3002 
radeon_cp_getparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3003 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3004 {
3005 	drm_radeon_private_t *dev_priv = dev->dev_private;
3006 	drm_radeon_getparam_t *param = data;
3007 	int value;
3008 
3009 	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3010 
3011 	switch (param->param) {
3012 	case RADEON_PARAM_GART_BUFFER_OFFSET:
3013 		value = dev_priv->gart_buffers_offset;
3014 		break;
3015 	case RADEON_PARAM_LAST_FRAME:
3016 		dev_priv->stats.last_frame_reads++;
3017 		value = GET_SCRATCH(dev_priv, 0);
3018 		break;
3019 	case RADEON_PARAM_LAST_DISPATCH:
3020 		value = GET_SCRATCH(dev_priv, 1);
3021 		break;
3022 	case RADEON_PARAM_LAST_CLEAR:
3023 		dev_priv->stats.last_clear_reads++;
3024 		value = GET_SCRATCH(dev_priv, 2);
3025 		break;
3026 	case RADEON_PARAM_IRQ_NR:
3027 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3028 			value = 0;
3029 		else
3030 			value = dev->irq;
3031 		break;
3032 	case RADEON_PARAM_GART_BASE:
3033 		value = dev_priv->gart_vm_start;
3034 		break;
3035 	case RADEON_PARAM_REGISTER_HANDLE:
3036 		value = dev_priv->mmio->offset;
3037 		break;
3038 	case RADEON_PARAM_STATUS_HANDLE:
3039 		value = dev_priv->ring_rptr_offset;
3040 		break;
3041 #ifndef __LP64__
3042 		/*
3043 		 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3044 		 * pointer which can't fit into an int-sized variable.  According to
3045 		 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3046 		 * not supporting it shouldn't be a problem.  If the same functionality
3047 		 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3048 		 * so backwards-compatibility for the embedded platforms can be
3049 		 * maintained.  --davidm 4-Feb-2004.
3050 		 */
3051 	case RADEON_PARAM_SAREA_HANDLE:
3052 		/* The lock is the first dword in the sarea. */
3053 		value = (long)dev->lock.hw_lock;
3054 		break;
3055 #endif
3056 	case RADEON_PARAM_GART_TEX_HANDLE:
3057 		value = dev_priv->gart_textures_offset;
3058 		break;
3059 	case RADEON_PARAM_SCRATCH_OFFSET:
3060 		if (!dev_priv->writeback_works)
3061 			return -EINVAL;
3062 		if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3063 			value = R600_SCRATCH_REG_OFFSET;
3064 		else
3065 			value = RADEON_SCRATCH_REG_OFFSET;
3066 		break;
3067 	case RADEON_PARAM_CARD_TYPE:
3068 		if (dev_priv->flags & RADEON_IS_PCIE)
3069 			value = RADEON_CARD_PCIE;
3070 		else if (dev_priv->flags & RADEON_IS_AGP)
3071 			value = RADEON_CARD_AGP;
3072 		else
3073 			value = RADEON_CARD_PCI;
3074 		break;
3075 	case RADEON_PARAM_VBLANK_CRTC:
3076 		value = radeon_vblank_crtc_get(dev);
3077 		break;
3078 	case RADEON_PARAM_FB_LOCATION:
3079 		value = radeon_read_fb_location(dev_priv);
3080 		break;
3081 	case RADEON_PARAM_NUM_GB_PIPES:
3082 		value = dev_priv->num_gb_pipes;
3083 		break;
3084 	case RADEON_PARAM_NUM_Z_PIPES:
3085 		value = dev_priv->num_z_pipes;
3086 		break;
3087 	default:
3088 		DRM_DEBUG("Invalid parameter %d\n", param->param);
3089 		return -EINVAL;
3090 	}
3091 
3092 	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3093 		DRM_ERROR("copy_to_user\n");
3094 		return -EFAULT;
3095 	}
3096 
3097 	return 0;
3098 }
3099 
radeon_cp_setparam(struct drm_device * dev,void * data,struct drm_file * file_priv)3100 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3101 {
3102 	drm_radeon_private_t *dev_priv = dev->dev_private;
3103 	drm_radeon_setparam_t *sp = data;
3104 	struct drm_radeon_driver_file_fields *radeon_priv;
3105 
3106 	switch (sp->param) {
3107 	case RADEON_SETPARAM_FB_LOCATION:
3108 		radeon_priv = file_priv->driver_priv;
3109 		radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3110 		    sp->value;
3111 		break;
3112 	case RADEON_SETPARAM_SWITCH_TILING:
3113 		if (sp->value == 0) {
3114 			DRM_DEBUG("color tiling disabled\n");
3115 			dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3116 			dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3117 			if (dev_priv->sarea_priv)
3118 				dev_priv->sarea_priv->tiling_enabled = 0;
3119 		} else if (sp->value == 1) {
3120 			DRM_DEBUG("color tiling enabled\n");
3121 			dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3122 			dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3123 			if (dev_priv->sarea_priv)
3124 				dev_priv->sarea_priv->tiling_enabled = 1;
3125 		}
3126 		break;
3127 	case RADEON_SETPARAM_PCIGART_LOCATION:
3128 		dev_priv->pcigart_offset = sp->value;
3129 		dev_priv->pcigart_offset_set = 1;
3130 		break;
3131 	case RADEON_SETPARAM_NEW_MEMMAP:
3132 		dev_priv->new_memmap = sp->value;
3133 		break;
3134 	case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3135 		dev_priv->gart_info.table_size = sp->value;
3136 		if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3137 			dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3138 		break;
3139 	case RADEON_SETPARAM_VBLANK_CRTC:
3140 		return radeon_vblank_crtc_set(dev, sp->value);
3141 		break;
3142 	default:
3143 		DRM_DEBUG("Invalid parameter %d\n", sp->param);
3144 		return -EINVAL;
3145 	}
3146 
3147 	return 0;
3148 }
3149 
3150 /* When a client dies:
3151  *    - Check for and clean up flipped page state
3152  *    - Free any alloced GART memory.
3153  *    - Free any alloced radeon surfaces.
3154  *
3155  * DRM infrastructure takes care of reclaiming dma buffers.
3156  */
radeon_driver_preclose(struct drm_device * dev,struct drm_file * file_priv)3157 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3158 {
3159 	if (dev->dev_private) {
3160 		drm_radeon_private_t *dev_priv = dev->dev_private;
3161 		dev_priv->page_flipping = 0;
3162 		radeon_mem_release(file_priv, dev_priv->gart_heap);
3163 		radeon_mem_release(file_priv, dev_priv->fb_heap);
3164 		radeon_surfaces_release(file_priv, dev_priv);
3165 	}
3166 }
3167 
radeon_driver_lastclose(struct drm_device * dev)3168 void radeon_driver_lastclose(struct drm_device *dev)
3169 {
3170 	radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3171 	if (dev->dev_private) {
3172 		drm_radeon_private_t *dev_priv = dev->dev_private;
3173 
3174 		if (dev_priv->sarea_priv &&
3175 		    dev_priv->sarea_priv->pfCurrentPage != 0)
3176 			radeon_cp_dispatch_flip(dev);
3177 	}
3178 
3179 	radeon_do_release(dev);
3180 }
3181 
radeon_driver_open(struct drm_device * dev,struct drm_file * file_priv)3182 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3183 {
3184 	drm_radeon_private_t *dev_priv = dev->dev_private;
3185 	struct drm_radeon_driver_file_fields *radeon_priv;
3186 
3187 	DRM_DEBUG("\n");
3188 	radeon_priv =
3189 	    (struct drm_radeon_driver_file_fields *)
3190 	    drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3191 
3192 	if (!radeon_priv)
3193 		return -ENOMEM;
3194 
3195 	file_priv->driver_priv = radeon_priv;
3196 
3197 	if (dev_priv)
3198 		radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3199 	else
3200 		radeon_priv->radeon_fb_delta = 0;
3201 	return 0;
3202 }
3203 
radeon_driver_postclose(struct drm_device * dev,struct drm_file * file_priv)3204 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3205 {
3206 	struct drm_radeon_driver_file_fields *radeon_priv =
3207 	    file_priv->driver_priv;
3208 
3209 	drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3210 }
3211 
3212 struct drm_ioctl_desc radeon_ioctls[] = {
3213 	DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3214 	DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3215 	DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3216 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3217 	DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3218 	DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3219 	DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3220 	DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3221 	DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3222 	DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3223 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3224 	DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3225 	DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3226 	DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3227 	DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3228 	DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3229 	DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3230 	DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3231 	DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3232 	DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3233 	DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3234 	DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3236 	DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3237 	DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3238 	DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3239 	DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3240 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH)
3241 };
3242 
3243 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
3244