1 /*
2  * Copyright 2009 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *     Alex Deucher <alexander.deucher@amd.com>
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD: stable/10/sys/dev/drm2/radeon/r600_blit.c 282199 2015-04-28 19:35:05Z dumbbell $");
29 
30 #include <dev/drm2/drmP.h>
31 #include <dev/drm2/radeon/radeon_drm.h>
32 #include "radeon_drv.h"
33 
34 #include "r600_blit_shaders.h"
35 
36 #define DI_PT_RECTLIST        0x11
37 #define DI_INDEX_SIZE_16_BIT  0x0
38 #define DI_SRC_SEL_AUTO_INDEX 0x2
39 
40 #define FMT_8                 0x1
41 #define FMT_5_6_5             0x8
42 #define FMT_8_8_8_8           0x1a
43 #define COLOR_8               0x1
44 #define COLOR_5_6_5           0x8
45 #define COLOR_8_8_8_8         0x1a
46 
47 static void
set_render_target(drm_radeon_private_t * dev_priv,int format,int w,int h,u64 gpu_addr)48 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
49 {
50 	u32 cb_color_info;
51 	int pitch, slice;
52 	RING_LOCALS;
53 	DRM_DEBUG("\n");
54 
55 	h = roundup2(h, 8);
56 	if (h < 8)
57 		h = 8;
58 
59 	cb_color_info = ((format << 2) | (1 << 27));
60 	pitch = (w / 8) - 1;
61 	slice = ((w * h) / 64) - 1;
62 
63 	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
64 	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
65 		BEGIN_RING(21 + 2);
66 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
67 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
68 		OUT_RING(gpu_addr >> 8);
69 		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
70 		OUT_RING(2 << 0);
71 	} else {
72 		BEGIN_RING(21);
73 		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
74 		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
75 		OUT_RING(gpu_addr >> 8);
76 	}
77 
78 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
79 	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
80 	OUT_RING((pitch << 0) | (slice << 10));
81 
82 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
83 	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
84 	OUT_RING(0);
85 
86 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
87 	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
88 	OUT_RING(cb_color_info);
89 
90 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
91 	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
92 	OUT_RING(0);
93 
94 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
95 	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
96 	OUT_RING(0);
97 
98 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
99 	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
100 	OUT_RING(0);
101 
102 	ADVANCE_RING();
103 }
104 
105 static void
cp_set_surface_sync(drm_radeon_private_t * dev_priv,u32 sync_type,u32 size,u64 mc_addr)106 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
107 		    u32 sync_type, u32 size, u64 mc_addr)
108 {
109 	u32 cp_coher_size;
110 	RING_LOCALS;
111 	DRM_DEBUG("\n");
112 
113 	if (size == 0xffffffff)
114 		cp_coher_size = 0xffffffff;
115 	else
116 		cp_coher_size = ((size + 255) >> 8);
117 
118 	BEGIN_RING(5);
119 	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
120 	OUT_RING(sync_type);
121 	OUT_RING(cp_coher_size);
122 	OUT_RING((mc_addr >> 8));
123 	OUT_RING(10); /* poll interval */
124 	ADVANCE_RING();
125 }
126 
127 static void
set_shaders(struct drm_device * dev)128 set_shaders(struct drm_device *dev)
129 {
130 	drm_radeon_private_t *dev_priv = dev->dev_private;
131 	u64 gpu_addr;
132 	int i;
133 	u32 *vs, *ps;
134 	uint32_t sq_pgm_resources;
135 	RING_LOCALS;
136 	DRM_DEBUG("\n");
137 
138 	/* load shaders */
139 	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
140 	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
141 
142 	for (i = 0; i < r6xx_vs_size; i++)
143 		vs[i] = cpu_to_le32(r6xx_vs[i]);
144 	for (i = 0; i < r6xx_ps_size; i++)
145 		ps[i] = cpu_to_le32(r6xx_ps[i]);
146 
147 	dev_priv->blit_vb->used = 512;
148 
149 	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
150 
151 	/* setup shader regs */
152 	sq_pgm_resources = (1 << 0);
153 
154 	BEGIN_RING(9 + 12);
155 	/* VS */
156 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
157 	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
158 	OUT_RING(gpu_addr >> 8);
159 
160 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
161 	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
162 	OUT_RING(sq_pgm_resources);
163 
164 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
165 	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
166 	OUT_RING(0);
167 
168 	/* PS */
169 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
170 	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
171 	OUT_RING((gpu_addr + 256) >> 8);
172 
173 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
174 	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
175 	OUT_RING(sq_pgm_resources | (1 << 28));
176 
177 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
178 	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
179 	OUT_RING(2);
180 
181 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
182 	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
183 	OUT_RING(0);
184 	ADVANCE_RING();
185 
186 	cp_set_surface_sync(dev_priv,
187 			    R600_SH_ACTION_ENA, 512, gpu_addr);
188 }
189 
190 static void
set_vtx_resource(drm_radeon_private_t * dev_priv,u64 gpu_addr)191 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
192 {
193 	uint32_t sq_vtx_constant_word2;
194 	RING_LOCALS;
195 	DRM_DEBUG("\n");
196 
197 	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
198 #ifdef __BIG_ENDIAN
199 	sq_vtx_constant_word2 |= (2U << 30);
200 #endif
201 
202 	BEGIN_RING(9);
203 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
204 	OUT_RING(0x460);
205 	OUT_RING(gpu_addr & 0xffffffff);
206 	OUT_RING(48 - 1);
207 	OUT_RING(sq_vtx_constant_word2);
208 	OUT_RING(1 << 0);
209 	OUT_RING(0);
210 	OUT_RING(0);
211 	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
212 	ADVANCE_RING();
213 
214 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
215 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
216 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
217 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
218 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
219 		cp_set_surface_sync(dev_priv,
220 				    R600_TC_ACTION_ENA, 48, gpu_addr);
221 	else
222 		cp_set_surface_sync(dev_priv,
223 				    R600_VC_ACTION_ENA, 48, gpu_addr);
224 }
225 
226 static void
set_tex_resource(drm_radeon_private_t * dev_priv,int format,int w,int h,int pitch,u64 gpu_addr)227 set_tex_resource(drm_radeon_private_t *dev_priv,
228 		 int format, int w, int h, int pitch, u64 gpu_addr)
229 {
230 	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
231 	RING_LOCALS;
232 	DRM_DEBUG("\n");
233 
234 	if (h < 1)
235 		h = 1;
236 
237 	sq_tex_resource_word0 = (1 << 0);
238 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
239 				  ((w - 1) << 19));
240 
241 	sq_tex_resource_word1 = (format << 26);
242 	sq_tex_resource_word1 |= ((h - 1) << 0);
243 
244 	sq_tex_resource_word4 = ((1 << 14) |
245 				 (0 << 16) |
246 				 (1 << 19) |
247 				 (2 << 22) |
248 				 (3 << 25));
249 
250 	BEGIN_RING(9);
251 	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
252 	OUT_RING(0);
253 	OUT_RING(sq_tex_resource_word0);
254 	OUT_RING(sq_tex_resource_word1);
255 	OUT_RING(gpu_addr >> 8);
256 	OUT_RING(gpu_addr >> 8);
257 	OUT_RING(sq_tex_resource_word4);
258 	OUT_RING(0);
259 	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
260 	ADVANCE_RING();
261 
262 }
263 
264 static void
set_scissors(drm_radeon_private_t * dev_priv,int x1,int y1,int x2,int y2)265 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
266 {
267 	RING_LOCALS;
268 	DRM_DEBUG("\n");
269 
270 	BEGIN_RING(12);
271 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
272 	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
273 	OUT_RING((x1 << 0) | (y1 << 16));
274 	OUT_RING((x2 << 0) | (y2 << 16));
275 
276 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
277 	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
278 	OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31));
279 	OUT_RING((x2 << 0) | (y2 << 16));
280 
281 	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
282 	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
283 	OUT_RING((x1 << 0) | (y1 << 16) | (1U << 31));
284 	OUT_RING((x2 << 0) | (y2 << 16));
285 	ADVANCE_RING();
286 }
287 
288 static void
draw_auto(drm_radeon_private_t * dev_priv)289 draw_auto(drm_radeon_private_t *dev_priv)
290 {
291 	RING_LOCALS;
292 	DRM_DEBUG("\n");
293 
294 	BEGIN_RING(10);
295 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
296 	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
297 	OUT_RING(DI_PT_RECTLIST);
298 
299 	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
300 #ifdef __BIG_ENDIAN
301 	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
302 #else
303 	OUT_RING(DI_INDEX_SIZE_16_BIT);
304 #endif
305 
306 	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
307 	OUT_RING(1);
308 
309 	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
310 	OUT_RING(3);
311 	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
312 
313 	ADVANCE_RING();
314 	COMMIT_RING();
315 }
316 
317 static void
set_default_state(drm_radeon_private_t * dev_priv)318 set_default_state(drm_radeon_private_t *dev_priv)
319 {
320 	int i;
321 	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
322 	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
323 	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
324 	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
325 	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
326 	RING_LOCALS;
327 
328 	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
329 	case CHIP_R600:
330 		num_ps_gprs = 192;
331 		num_vs_gprs = 56;
332 		num_temp_gprs = 4;
333 		num_gs_gprs = 0;
334 		num_es_gprs = 0;
335 		num_ps_threads = 136;
336 		num_vs_threads = 48;
337 		num_gs_threads = 4;
338 		num_es_threads = 4;
339 		num_ps_stack_entries = 128;
340 		num_vs_stack_entries = 128;
341 		num_gs_stack_entries = 0;
342 		num_es_stack_entries = 0;
343 		break;
344 	case CHIP_RV630:
345 	case CHIP_RV635:
346 		num_ps_gprs = 84;
347 		num_vs_gprs = 36;
348 		num_temp_gprs = 4;
349 		num_gs_gprs = 0;
350 		num_es_gprs = 0;
351 		num_ps_threads = 144;
352 		num_vs_threads = 40;
353 		num_gs_threads = 4;
354 		num_es_threads = 4;
355 		num_ps_stack_entries = 40;
356 		num_vs_stack_entries = 40;
357 		num_gs_stack_entries = 32;
358 		num_es_stack_entries = 16;
359 		break;
360 	case CHIP_RV610:
361 	case CHIP_RV620:
362 	case CHIP_RS780:
363 	case CHIP_RS880:
364 	default:
365 		num_ps_gprs = 84;
366 		num_vs_gprs = 36;
367 		num_temp_gprs = 4;
368 		num_gs_gprs = 0;
369 		num_es_gprs = 0;
370 		num_ps_threads = 136;
371 		num_vs_threads = 48;
372 		num_gs_threads = 4;
373 		num_es_threads = 4;
374 		num_ps_stack_entries = 40;
375 		num_vs_stack_entries = 40;
376 		num_gs_stack_entries = 32;
377 		num_es_stack_entries = 16;
378 		break;
379 	case CHIP_RV670:
380 		num_ps_gprs = 144;
381 		num_vs_gprs = 40;
382 		num_temp_gprs = 4;
383 		num_gs_gprs = 0;
384 		num_es_gprs = 0;
385 		num_ps_threads = 136;
386 		num_vs_threads = 48;
387 		num_gs_threads = 4;
388 		num_es_threads = 4;
389 		num_ps_stack_entries = 40;
390 		num_vs_stack_entries = 40;
391 		num_gs_stack_entries = 32;
392 		num_es_stack_entries = 16;
393 		break;
394 	case CHIP_RV770:
395 		num_ps_gprs = 192;
396 		num_vs_gprs = 56;
397 		num_temp_gprs = 4;
398 		num_gs_gprs = 0;
399 		num_es_gprs = 0;
400 		num_ps_threads = 188;
401 		num_vs_threads = 60;
402 		num_gs_threads = 0;
403 		num_es_threads = 0;
404 		num_ps_stack_entries = 256;
405 		num_vs_stack_entries = 256;
406 		num_gs_stack_entries = 0;
407 		num_es_stack_entries = 0;
408 		break;
409 	case CHIP_RV730:
410 	case CHIP_RV740:
411 		num_ps_gprs = 84;
412 		num_vs_gprs = 36;
413 		num_temp_gprs = 4;
414 		num_gs_gprs = 0;
415 		num_es_gprs = 0;
416 		num_ps_threads = 188;
417 		num_vs_threads = 60;
418 		num_gs_threads = 0;
419 		num_es_threads = 0;
420 		num_ps_stack_entries = 128;
421 		num_vs_stack_entries = 128;
422 		num_gs_stack_entries = 0;
423 		num_es_stack_entries = 0;
424 		break;
425 	case CHIP_RV710:
426 		num_ps_gprs = 192;
427 		num_vs_gprs = 56;
428 		num_temp_gprs = 4;
429 		num_gs_gprs = 0;
430 		num_es_gprs = 0;
431 		num_ps_threads = 144;
432 		num_vs_threads = 48;
433 		num_gs_threads = 0;
434 		num_es_threads = 0;
435 		num_ps_stack_entries = 128;
436 		num_vs_stack_entries = 128;
437 		num_gs_stack_entries = 0;
438 		num_es_stack_entries = 0;
439 		break;
440 	}
441 
442 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
443 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
444 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
445 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
446 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
447 		sq_config = 0;
448 	else
449 		sq_config = R600_VC_ENABLE;
450 
451 	sq_config |= (R600_DX9_CONSTS |
452 		      R600_ALU_INST_PREFER_VECTOR |
453 		      R600_PS_PRIO(0) |
454 		      R600_VS_PRIO(1) |
455 		      R600_GS_PRIO(2) |
456 		      R600_ES_PRIO(3));
457 
458 	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
459 				  R600_NUM_VS_GPRS(num_vs_gprs) |
460 				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
461 	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
462 				  R600_NUM_ES_GPRS(num_es_gprs));
463 	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
464 				   R600_NUM_VS_THREADS(num_vs_threads) |
465 				   R600_NUM_GS_THREADS(num_gs_threads) |
466 				   R600_NUM_ES_THREADS(num_es_threads));
467 	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
468 				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
469 	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
470 				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
471 
472 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
473 		BEGIN_RING(r7xx_default_size + 10);
474 		for (i = 0; i < r7xx_default_size; i++)
475 			OUT_RING(r7xx_default_state[i]);
476 	} else {
477 		BEGIN_RING(r6xx_default_size + 10);
478 		for (i = 0; i < r6xx_default_size; i++)
479 			OUT_RING(r6xx_default_state[i]);
480 	}
481 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
482 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
483 	/* SQ config */
484 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
485 	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
486 	OUT_RING(sq_config);
487 	OUT_RING(sq_gpr_resource_mgmt_1);
488 	OUT_RING(sq_gpr_resource_mgmt_2);
489 	OUT_RING(sq_thread_resource_mgmt);
490 	OUT_RING(sq_stack_resource_mgmt_1);
491 	OUT_RING(sq_stack_resource_mgmt_2);
492 	ADVANCE_RING();
493 }
494 
495 /* 23 bits of float fractional data */
496 #define I2F_FRAC_BITS  23
497 #define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
498 
499 /*
500  * Converts unsigned integer into 32-bit IEEE floating point representation.
501  * Will be exact from 0 to 2^24.  Above that, we round towards zero
502  * as the fractional bits will not fit in a float.  (It would be better to
503  * round towards even as the fpu does, but that is slower.)
504  */
int2float(uint32_t x)505 __pure uint32_t int2float(uint32_t x)
506 {
507 	uint32_t msb, exponent, fraction;
508 
509 	/* Zero is special */
510 	if (!x) return 0;
511 
512 	/* Get location of the most significant bit */
513 	msb = fls(x);
514 
515 	/*
516 	 * Use a rotate instead of a shift because that works both leftwards
517 	 * and rightwards due to the mod(32) behaviour.  This means we don't
518 	 * need to check to see if we are above 2^24 or not.
519 	 */
520 	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
521 	exponent = (127 + msb) << I2F_FRAC_BITS;
522 
523 	return fraction + exponent;
524 }
525 
r600_nomm_get_vb(struct drm_device * dev)526 static int r600_nomm_get_vb(struct drm_device *dev)
527 {
528 	drm_radeon_private_t *dev_priv = dev->dev_private;
529 	dev_priv->blit_vb = radeon_freelist_get(dev);
530 	if (!dev_priv->blit_vb) {
531 		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
532 		return -EAGAIN;
533 	}
534 	return 0;
535 }
536 
r600_nomm_put_vb(struct drm_device * dev)537 static void r600_nomm_put_vb(struct drm_device *dev)
538 {
539 	drm_radeon_private_t *dev_priv = dev->dev_private;
540 
541 	dev_priv->blit_vb->used = 0;
542 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
543 }
544 
r600_nomm_get_vb_ptr(struct drm_device * dev)545 static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
546 {
547 	drm_radeon_private_t *dev_priv = dev->dev_private;
548 	return (((char *)dev->agp_buffer_map->handle +
549 		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
550 }
551 
552 int
r600_prepare_blit_copy(struct drm_device * dev,struct drm_file * file_priv)553 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
554 {
555 	drm_radeon_private_t *dev_priv = dev->dev_private;
556 	int ret;
557 	DRM_DEBUG("\n");
558 
559 	ret = r600_nomm_get_vb(dev);
560 	if (ret)
561 		return ret;
562 
563 	dev_priv->blit_vb->file_priv = file_priv;
564 
565 	set_default_state(dev_priv);
566 	set_shaders(dev);
567 
568 	return 0;
569 }
570 
571 
572 void
r600_done_blit_copy(struct drm_device * dev)573 r600_done_blit_copy(struct drm_device *dev)
574 {
575 	drm_radeon_private_t *dev_priv = dev->dev_private;
576 	RING_LOCALS;
577 	DRM_DEBUG("\n");
578 
579 	BEGIN_RING(5);
580 	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
581 	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
582 	/* wait for 3D idle clean */
583 	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
584 	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
585 	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
586 
587 	ADVANCE_RING();
588 	COMMIT_RING();
589 
590 	r600_nomm_put_vb(dev);
591 }
592 
593 void
r600_blit_copy(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int size_bytes)594 r600_blit_copy(struct drm_device *dev,
595 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
596 	       int size_bytes)
597 {
598 	drm_radeon_private_t *dev_priv = dev->dev_private;
599 	int max_bytes;
600 	u64 vb_addr;
601 	u32 *vb;
602 
603 	vb = r600_nomm_get_vb_ptr(dev);
604 
605 	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
606 		max_bytes = 8192;
607 
608 		while (size_bytes) {
609 			int cur_size = size_bytes;
610 			int src_x = src_gpu_addr & 255;
611 			int dst_x = dst_gpu_addr & 255;
612 			int h = 1;
613 			src_gpu_addr = src_gpu_addr & ~255;
614 			dst_gpu_addr = dst_gpu_addr & ~255;
615 
616 			if (!src_x && !dst_x) {
617 				h = (cur_size / max_bytes);
618 				if (h > 8192)
619 					h = 8192;
620 				if (h == 0)
621 					h = 1;
622 				else
623 					cur_size = max_bytes;
624 			} else {
625 				if (cur_size > max_bytes)
626 					cur_size = max_bytes;
627 				if (cur_size > (max_bytes - dst_x))
628 					cur_size = (max_bytes - dst_x);
629 				if (cur_size > (max_bytes - src_x))
630 					cur_size = (max_bytes - src_x);
631 			}
632 
633 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
634 
635 				r600_nomm_put_vb(dev);
636 				r600_nomm_get_vb(dev);
637 				if (!dev_priv->blit_vb)
638 					return;
639 				set_shaders(dev);
640 				vb = r600_nomm_get_vb_ptr(dev);
641 			}
642 
643 			vb[0] = int2float(dst_x);
644 			vb[1] = 0;
645 			vb[2] = int2float(src_x);
646 			vb[3] = 0;
647 
648 			vb[4] = int2float(dst_x);
649 			vb[5] = int2float(h);
650 			vb[6] = int2float(src_x);
651 			vb[7] = int2float(h);
652 
653 			vb[8] = int2float(dst_x + cur_size);
654 			vb[9] = int2float(h);
655 			vb[10] = int2float(src_x + cur_size);
656 			vb[11] = int2float(h);
657 
658 			/* src */
659 			set_tex_resource(dev_priv, FMT_8,
660 					 src_x + cur_size, h, src_x + cur_size,
661 					 src_gpu_addr);
662 
663 			cp_set_surface_sync(dev_priv,
664 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
665 
666 			/* dst */
667 			set_render_target(dev_priv, COLOR_8,
668 					  dst_x + cur_size, h,
669 					  dst_gpu_addr);
670 
671 			/* scissors */
672 			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
673 
674 			/* Vertex buffer setup */
675 			vb_addr = dev_priv->gart_buffers_offset +
676 				dev_priv->blit_vb->offset +
677 				dev_priv->blit_vb->used;
678 			set_vtx_resource(dev_priv, vb_addr);
679 
680 			/* draw */
681 			draw_auto(dev_priv);
682 
683 			cp_set_surface_sync(dev_priv,
684 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
685 					    cur_size * h, dst_gpu_addr);
686 
687 			vb += 12;
688 			dev_priv->blit_vb->used += 12 * 4;
689 
690 			src_gpu_addr += cur_size * h;
691 			dst_gpu_addr += cur_size * h;
692 			size_bytes -= cur_size * h;
693 		}
694 	} else {
695 		max_bytes = 8192 * 4;
696 
697 		while (size_bytes) {
698 			int cur_size = size_bytes;
699 			int src_x = (src_gpu_addr & 255);
700 			int dst_x = (dst_gpu_addr & 255);
701 			int h = 1;
702 			src_gpu_addr = src_gpu_addr & ~255;
703 			dst_gpu_addr = dst_gpu_addr & ~255;
704 
705 			if (!src_x && !dst_x) {
706 				h = (cur_size / max_bytes);
707 				if (h > 8192)
708 					h = 8192;
709 				if (h == 0)
710 					h = 1;
711 				else
712 					cur_size = max_bytes;
713 			} else {
714 				if (cur_size > max_bytes)
715 					cur_size = max_bytes;
716 				if (cur_size > (max_bytes - dst_x))
717 					cur_size = (max_bytes - dst_x);
718 				if (cur_size > (max_bytes - src_x))
719 					cur_size = (max_bytes - src_x);
720 			}
721 
722 			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
723 				r600_nomm_put_vb(dev);
724 				r600_nomm_get_vb(dev);
725 				if (!dev_priv->blit_vb)
726 					return;
727 
728 				set_shaders(dev);
729 				vb = r600_nomm_get_vb_ptr(dev);
730 			}
731 
732 			vb[0] = int2float(dst_x / 4);
733 			vb[1] = 0;
734 			vb[2] = int2float(src_x / 4);
735 			vb[3] = 0;
736 
737 			vb[4] = int2float(dst_x / 4);
738 			vb[5] = int2float(h);
739 			vb[6] = int2float(src_x / 4);
740 			vb[7] = int2float(h);
741 
742 			vb[8] = int2float((dst_x + cur_size) / 4);
743 			vb[9] = int2float(h);
744 			vb[10] = int2float((src_x + cur_size) / 4);
745 			vb[11] = int2float(h);
746 
747 			/* src */
748 			set_tex_resource(dev_priv, FMT_8_8_8_8,
749 					 (src_x + cur_size) / 4,
750 					 h, (src_x + cur_size) / 4,
751 					 src_gpu_addr);
752 
753 			cp_set_surface_sync(dev_priv,
754 					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
755 
756 			/* dst */
757 			set_render_target(dev_priv, COLOR_8_8_8_8,
758 					  (dst_x + cur_size) / 4, h,
759 					  dst_gpu_addr);
760 
761 			/* scissors */
762 			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
763 
764 			/* Vertex buffer setup */
765 			vb_addr = dev_priv->gart_buffers_offset +
766 				dev_priv->blit_vb->offset +
767 				dev_priv->blit_vb->used;
768 			set_vtx_resource(dev_priv, vb_addr);
769 
770 			/* draw */
771 			draw_auto(dev_priv);
772 
773 			cp_set_surface_sync(dev_priv,
774 					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
775 					    cur_size * h, dst_gpu_addr);
776 
777 			vb += 12;
778 			dev_priv->blit_vb->used += 12 * 4;
779 
780 			src_gpu_addr += cur_size * h;
781 			dst_gpu_addr += cur_size * h;
782 			size_bytes -= cur_size * h;
783 		}
784 	}
785 }
786 
787 void
r600_blit_swap(struct drm_device * dev,uint64_t src_gpu_addr,uint64_t dst_gpu_addr,int sx,int sy,int dx,int dy,int w,int h,int src_pitch,int dst_pitch,int cpp)788 r600_blit_swap(struct drm_device *dev,
789 	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
790 	       int sx, int sy, int dx, int dy,
791 	       int w, int h, int src_pitch, int dst_pitch, int cpp)
792 {
793 	drm_radeon_private_t *dev_priv = dev->dev_private;
794 	int cb_format, tex_format;
795 	int sx2, sy2, dx2, dy2;
796 	u64 vb_addr;
797 	u32 *vb;
798 
799 	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
800 
801 		r600_nomm_put_vb(dev);
802 		r600_nomm_get_vb(dev);
803 		if (!dev_priv->blit_vb)
804 			return;
805 
806 		set_shaders(dev);
807 	}
808 	vb = r600_nomm_get_vb_ptr(dev);
809 
810 	sx2 = sx + w;
811 	sy2 = sy + h;
812 	dx2 = dx + w;
813 	dy2 = dy + h;
814 
815 	vb[0] = int2float(dx);
816 	vb[1] = int2float(dy);
817 	vb[2] = int2float(sx);
818 	vb[3] = int2float(sy);
819 
820 	vb[4] = int2float(dx);
821 	vb[5] = int2float(dy2);
822 	vb[6] = int2float(sx);
823 	vb[7] = int2float(sy2);
824 
825 	vb[8] = int2float(dx2);
826 	vb[9] = int2float(dy2);
827 	vb[10] = int2float(sx2);
828 	vb[11] = int2float(sy2);
829 
830 	switch(cpp) {
831 	case 4:
832 		cb_format = COLOR_8_8_8_8;
833 		tex_format = FMT_8_8_8_8;
834 		break;
835 	case 2:
836 		cb_format = COLOR_5_6_5;
837 		tex_format = FMT_5_6_5;
838 		break;
839 	default:
840 		cb_format = COLOR_8;
841 		tex_format = FMT_8;
842 		break;
843 	}
844 
845 	/* src */
846 	set_tex_resource(dev_priv, tex_format,
847 			 src_pitch / cpp,
848 			 sy2, src_pitch / cpp,
849 			 src_gpu_addr);
850 
851 	cp_set_surface_sync(dev_priv,
852 			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
853 
854 	/* dst */
855 	set_render_target(dev_priv, cb_format,
856 			  dst_pitch / cpp, dy2,
857 			  dst_gpu_addr);
858 
859 	/* scissors */
860 	set_scissors(dev_priv, dx, dy, dx2, dy2);
861 
862 	/* Vertex buffer setup */
863 	vb_addr = dev_priv->gart_buffers_offset +
864 		dev_priv->blit_vb->offset +
865 		dev_priv->blit_vb->used;
866 	set_vtx_resource(dev_priv, vb_addr);
867 
868 	/* draw */
869 	draw_auto(dev_priv);
870 
871 	cp_set_surface_sync(dev_priv,
872 			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
873 			    dst_pitch * dy2, dst_gpu_addr);
874 
875 	dev_priv->blit_vb->used += 12 * 4;
876 }
877