1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * ATI Mach64 Hardware Acceleration |
5 | */ |
6 | |
7 | #include <linux/delay.h> |
8 | #include <asm/unaligned.h> |
9 | #include <linux/fb.h> |
10 | #include <video/mach64.h> |
11 | #include "atyfb.h" |
12 | |
13 | /* |
14 | * Generic Mach64 routines |
15 | */ |
16 | |
17 | /* this is for DMA GUI engine! work in progress */ |
18 | typedef struct { |
19 | u32 frame_buf_offset; |
20 | u32 system_mem_addr; |
21 | u32 command; |
22 | u32 reserved; |
23 | } BM_DESCRIPTOR_ENTRY; |
24 | |
25 | #define LAST_DESCRIPTOR (1 << 31) |
26 | #define SYSTEM_TO_FRAME_BUFFER 0 |
27 | |
28 | static u32 rotation24bpp(u32 dx, u32 direction) |
29 | { |
30 | u32 rotation; |
31 | if (direction & DST_X_LEFT_TO_RIGHT) { |
32 | rotation = (dx / 4) % 6; |
33 | } else { |
34 | rotation = ((dx + 2) / 4) % 6; |
35 | } |
36 | |
37 | return ((rotation << 8) | DST_24_ROTATION_ENABLE); |
38 | } |
39 | |
40 | void aty_reset_engine(struct atyfb_par *par) |
41 | { |
42 | /* reset engine */ |
43 | aty_st_le32(GEN_TEST_CNTL, |
44 | val: aty_ld_le32(GEN_TEST_CNTL, par) & |
45 | ~(GUI_ENGINE_ENABLE | HWCURSOR_ENABLE), par); |
46 | /* enable engine */ |
47 | aty_st_le32(GEN_TEST_CNTL, |
48 | val: aty_ld_le32(GEN_TEST_CNTL, par) | GUI_ENGINE_ENABLE, par); |
49 | /* ensure engine is not locked up by clearing any FIFO or */ |
50 | /* HOST errors */ |
51 | aty_st_le32(BUS_CNTL, |
52 | val: aty_ld_le32(BUS_CNTL, par) | BUS_HOST_ERR_ACK | BUS_FIFO_ERR_ACK, par); |
53 | |
54 | par->fifo_space = 0; |
55 | } |
56 | |
57 | static void reset_GTC_3D_engine(const struct atyfb_par *par) |
58 | { |
59 | aty_st_le32(SCALE_3D_CNTL, val: 0xc0, par); |
60 | mdelay(GTC_3D_RESET_DELAY); |
61 | aty_st_le32(SETUP_CNTL, val: 0x00, par); |
62 | mdelay(GTC_3D_RESET_DELAY); |
63 | aty_st_le32(SCALE_3D_CNTL, val: 0x00, par); |
64 | mdelay(GTC_3D_RESET_DELAY); |
65 | } |
66 | |
67 | void aty_init_engine(struct atyfb_par *par, struct fb_info *info) |
68 | { |
69 | u32 pitch_value; |
70 | u32 vxres; |
71 | |
72 | /* determine modal information from global mode structure */ |
73 | pitch_value = info->fix.line_length / (info->var.bits_per_pixel / 8); |
74 | vxres = info->var.xres_virtual; |
75 | |
76 | if (info->var.bits_per_pixel == 24) { |
77 | /* In 24 bpp, the engine is in 8 bpp - this requires that all */ |
78 | /* horizontal coordinates and widths must be adjusted */ |
79 | pitch_value *= 3; |
80 | vxres *= 3; |
81 | } |
82 | |
83 | /* On GTC (RagePro), we need to reset the 3D engine before */ |
84 | if (M64_HAS(RESET_3D)) |
85 | reset_GTC_3D_engine(par); |
86 | |
87 | /* Reset engine, enable, and clear any engine errors */ |
88 | aty_reset_engine(par); |
89 | /* Ensure that vga page pointers are set to zero - the upper */ |
90 | /* page pointers are set to 1 to handle overflows in the */ |
91 | /* lower page */ |
92 | aty_st_le32(MEM_VGA_WP_SEL, val: 0x00010000, par); |
93 | aty_st_le32(MEM_VGA_RP_SEL, val: 0x00010000, par); |
94 | |
95 | /* ---- Setup standard engine context ---- */ |
96 | |
97 | /* All GUI registers here are FIFOed - therefore, wait for */ |
98 | /* the appropriate number of empty FIFO entries */ |
99 | wait_for_fifo(entries: 14, par); |
100 | |
101 | /* enable all registers to be loaded for context loads */ |
102 | aty_st_le32(CONTEXT_MASK, val: 0xFFFFFFFF, par); |
103 | |
104 | /* set destination pitch to modal pitch, set offset to zero */ |
105 | aty_st_le32(DST_OFF_PITCH, val: (pitch_value / 8) << 22, par); |
106 | |
107 | /* zero these registers (set them to a known state) */ |
108 | aty_st_le32(DST_Y_X, val: 0, par); |
109 | aty_st_le32(DST_HEIGHT, val: 0, par); |
110 | aty_st_le32(DST_BRES_ERR, val: 0, par); |
111 | aty_st_le32(DST_BRES_INC, val: 0, par); |
112 | aty_st_le32(DST_BRES_DEC, val: 0, par); |
113 | |
114 | /* set destination drawing attributes */ |
115 | aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | |
116 | DST_X_LEFT_TO_RIGHT, par); |
117 | |
118 | /* set source pitch to modal pitch, set offset to zero */ |
119 | aty_st_le32(SRC_OFF_PITCH, val: (pitch_value / 8) << 22, par); |
120 | |
121 | /* set these registers to a known state */ |
122 | aty_st_le32(SRC_Y_X, val: 0, par); |
123 | aty_st_le32(SRC_HEIGHT1_WIDTH1, val: 1, par); |
124 | aty_st_le32(SRC_Y_X_START, val: 0, par); |
125 | aty_st_le32(SRC_HEIGHT2_WIDTH2, val: 1, par); |
126 | |
127 | /* set source pixel retrieving attributes */ |
128 | aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, par); |
129 | |
130 | /* set host attributes */ |
131 | wait_for_fifo(entries: 13, par); |
132 | aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); |
133 | |
134 | /* set pattern attributes */ |
135 | aty_st_le32(PAT_REG0, val: 0, par); |
136 | aty_st_le32(PAT_REG1, val: 0, par); |
137 | aty_st_le32(PAT_CNTL, val: 0, par); |
138 | |
139 | /* set scissors to modal size */ |
140 | aty_st_le32(SC_LEFT, val: 0, par); |
141 | aty_st_le32(SC_TOP, val: 0, par); |
142 | aty_st_le32(SC_BOTTOM, val: par->crtc.vyres - 1, par); |
143 | aty_st_le32(SC_RIGHT, val: vxres - 1, par); |
144 | |
145 | /* set background color to minimum value (usually BLACK) */ |
146 | aty_st_le32(DP_BKGD_CLR, val: 0, par); |
147 | |
148 | /* set foreground color to maximum value (usually WHITE) */ |
149 | aty_st_le32(DP_FRGD_CLR, val: 0xFFFFFFFF, par); |
150 | |
151 | /* set write mask to effect all pixel bits */ |
152 | aty_st_le32(DP_WRITE_MASK, val: 0xFFFFFFFF, par); |
153 | |
154 | /* set foreground mix to overpaint and background mix to */ |
155 | /* no-effect */ |
156 | aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, par); |
157 | |
158 | /* set primary source pixel channel to foreground color */ |
159 | /* register */ |
160 | aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, par); |
161 | |
162 | /* set compare functionality to false (no-effect on */ |
163 | /* destination) */ |
164 | wait_for_fifo(entries: 3, par); |
165 | aty_st_le32(CLR_CMP_CLR, val: 0, par); |
166 | aty_st_le32(CLR_CMP_MASK, val: 0xFFFFFFFF, par); |
167 | aty_st_le32(CLR_CMP_CNTL, val: 0, par); |
168 | |
169 | /* set pixel depth */ |
170 | wait_for_fifo(entries: 2, par); |
171 | aty_st_le32(DP_PIX_WIDTH, val: par->crtc.dp_pix_width, par); |
172 | aty_st_le32(DP_CHAIN_MASK, val: par->crtc.dp_chain_mask, par); |
173 | |
174 | wait_for_fifo(entries: 5, par); |
175 | aty_st_le32(SCALE_3D_CNTL, val: 0, par); |
176 | aty_st_le32(Z_CNTL, val: 0, par); |
177 | aty_st_le32(CRTC_INT_CNTL, val: aty_ld_le32(CRTC_INT_CNTL, par) & ~0x20, |
178 | par); |
179 | aty_st_le32(GUI_TRAJ_CNTL, val: 0x100023, par); |
180 | |
181 | /* insure engine is idle before leaving */ |
182 | wait_for_idle(par); |
183 | } |
184 | |
185 | /* |
186 | * Accelerated functions |
187 | */ |
188 | |
189 | static inline void draw_rect(s16 x, s16 y, u16 width, u16 height, |
190 | struct atyfb_par *par) |
191 | { |
192 | /* perform rectangle fill */ |
193 | wait_for_fifo(entries: 2, par); |
194 | aty_st_le32(DST_Y_X, val: (x << 16) | y, par); |
195 | aty_st_le32(DST_HEIGHT_WIDTH, val: (width << 16) | height, par); |
196 | par->blitter_may_be_busy = 1; |
197 | } |
198 | |
199 | void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) |
200 | { |
201 | struct atyfb_par *par = (struct atyfb_par *) info->par; |
202 | u32 dy = area->dy, sy = area->sy, direction = DST_LAST_PEL; |
203 | u32 sx = area->sx, dx = area->dx, width = area->width, rotation = 0; |
204 | |
205 | if (par->asleep) |
206 | return; |
207 | if (!area->width || !area->height) |
208 | return; |
209 | if (!par->accel_flags) { |
210 | cfb_copyarea(info, area); |
211 | return; |
212 | } |
213 | |
214 | if (info->var.bits_per_pixel == 24) { |
215 | /* In 24 bpp, the engine is in 8 bpp - this requires that all */ |
216 | /* horizontal coordinates and widths must be adjusted */ |
217 | sx *= 3; |
218 | dx *= 3; |
219 | width *= 3; |
220 | } |
221 | |
222 | if (area->sy < area->dy) { |
223 | dy += area->height - 1; |
224 | sy += area->height - 1; |
225 | } else |
226 | direction |= DST_Y_TOP_TO_BOTTOM; |
227 | |
228 | if (sx < dx) { |
229 | dx += width - 1; |
230 | sx += width - 1; |
231 | } else |
232 | direction |= DST_X_LEFT_TO_RIGHT; |
233 | |
234 | if (info->var.bits_per_pixel == 24) { |
235 | rotation = rotation24bpp(dx, direction); |
236 | } |
237 | |
238 | wait_for_fifo(entries: 5, par); |
239 | aty_st_le32(DP_PIX_WIDTH, val: par->crtc.dp_pix_width, par); |
240 | aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par); |
241 | aty_st_le32(SRC_Y_X, val: (sx << 16) | sy, par); |
242 | aty_st_le32(SRC_HEIGHT1_WIDTH1, val: (width << 16) | area->height, par); |
243 | aty_st_le32(DST_CNTL, val: direction | rotation, par); |
244 | draw_rect(x: dx, y: dy, width, height: area->height, par); |
245 | } |
246 | |
247 | void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) |
248 | { |
249 | struct atyfb_par *par = (struct atyfb_par *) info->par; |
250 | u32 color, dx = rect->dx, width = rect->width, rotation = 0; |
251 | |
252 | if (par->asleep) |
253 | return; |
254 | if (!rect->width || !rect->height) |
255 | return; |
256 | if (!par->accel_flags) { |
257 | cfb_fillrect(info, rect); |
258 | return; |
259 | } |
260 | |
261 | if (info->fix.visual == FB_VISUAL_TRUECOLOR || |
262 | info->fix.visual == FB_VISUAL_DIRECTCOLOR) |
263 | color = ((u32 *)(info->pseudo_palette))[rect->color]; |
264 | else |
265 | color = rect->color; |
266 | |
267 | if (info->var.bits_per_pixel == 24) { |
268 | /* In 24 bpp, the engine is in 8 bpp - this requires that all */ |
269 | /* horizontal coordinates and widths must be adjusted */ |
270 | dx *= 3; |
271 | width *= 3; |
272 | rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); |
273 | } |
274 | |
275 | wait_for_fifo(entries: 4, par); |
276 | aty_st_le32(DP_PIX_WIDTH, val: par->crtc.dp_pix_width, par); |
277 | aty_st_le32(DP_FRGD_CLR, val: color, par); |
278 | aty_st_le32(DP_SRC, |
279 | BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE, |
280 | par); |
281 | aty_st_le32(DST_CNTL, |
282 | DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM | |
283 | DST_X_LEFT_TO_RIGHT | rotation, par); |
284 | draw_rect(x: dx, y: rect->dy, width, height: rect->height, par); |
285 | } |
286 | |
287 | void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) |
288 | { |
289 | struct atyfb_par *par = (struct atyfb_par *) info->par; |
290 | u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width; |
291 | u32 pix_width, rotation = 0, src, mix; |
292 | |
293 | if (par->asleep) |
294 | return; |
295 | if (!image->width || !image->height) |
296 | return; |
297 | if (!par->accel_flags || |
298 | (image->depth != 1 && info->var.bits_per_pixel != image->depth)) { |
299 | cfb_imageblit(info, image); |
300 | return; |
301 | } |
302 | |
303 | pix_width = par->crtc.dp_pix_width; |
304 | |
305 | switch (image->depth) { |
306 | case 1: |
307 | pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK); |
308 | pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_1BPP); |
309 | break; |
310 | case 4: |
311 | pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK); |
312 | pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_4BPP); |
313 | break; |
314 | case 8: |
315 | pix_width &= ~HOST_MASK; |
316 | pix_width |= HOST_8BPP; |
317 | break; |
318 | case 15: |
319 | pix_width &= ~HOST_MASK; |
320 | pix_width |= HOST_15BPP; |
321 | break; |
322 | case 16: |
323 | pix_width &= ~HOST_MASK; |
324 | pix_width |= HOST_16BPP; |
325 | break; |
326 | case 24: |
327 | pix_width &= ~HOST_MASK; |
328 | pix_width |= HOST_24BPP; |
329 | break; |
330 | case 32: |
331 | pix_width &= ~HOST_MASK; |
332 | pix_width |= HOST_32BPP; |
333 | break; |
334 | } |
335 | |
336 | if (info->var.bits_per_pixel == 24) { |
337 | /* In 24 bpp, the engine is in 8 bpp - this requires that all */ |
338 | /* horizontal coordinates and widths must be adjusted */ |
339 | dx *= 3; |
340 | width *= 3; |
341 | |
342 | rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT); |
343 | |
344 | pix_width &= ~DST_MASK; |
345 | pix_width |= DST_8BPP; |
346 | |
347 | /* |
348 | * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit |
349 | * this hwaccelerated triple has an issue with not aligned data |
350 | */ |
351 | if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0) |
352 | pix_width |= DP_HOST_TRIPLE_EN; |
353 | } |
354 | |
355 | if (image->depth == 1) { |
356 | u32 fg, bg; |
357 | if (info->fix.visual == FB_VISUAL_TRUECOLOR || |
358 | info->fix.visual == FB_VISUAL_DIRECTCOLOR) { |
359 | fg = ((u32*)(info->pseudo_palette))[image->fg_color]; |
360 | bg = ((u32*)(info->pseudo_palette))[image->bg_color]; |
361 | } else { |
362 | fg = image->fg_color; |
363 | bg = image->bg_color; |
364 | } |
365 | |
366 | wait_for_fifo(entries: 2, par); |
367 | aty_st_le32(DP_BKGD_CLR, val: bg, par); |
368 | aty_st_le32(DP_FRGD_CLR, val: fg, par); |
369 | src = MONO_SRC_HOST | FRGD_SRC_FRGD_CLR | BKGD_SRC_BKGD_CLR; |
370 | mix = FRGD_MIX_S | BKGD_MIX_S; |
371 | } else { |
372 | src = MONO_SRC_ONE | FRGD_SRC_HOST; |
373 | mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D; |
374 | } |
375 | |
376 | wait_for_fifo(entries: 5, par); |
377 | aty_st_le32(DP_PIX_WIDTH, val: pix_width, par); |
378 | aty_st_le32(DP_MIX, val: mix, par); |
379 | aty_st_le32(DP_SRC, val: src, par); |
380 | aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par); |
381 | aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par); |
382 | |
383 | draw_rect(x: dx, y: dy, width, height: image->height, par); |
384 | src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; |
385 | |
386 | /* manual triple each pixel */ |
387 | if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) { |
388 | int inbit, outbit, mult24, byte_id_in_dword, width; |
389 | u8 *pbitmapin = (u8*)image->data, *pbitmapout; |
390 | u32 hostdword; |
391 | |
392 | for (width = image->width, inbit = 7, mult24 = 0; src_bytes; ) { |
393 | for (hostdword = 0, pbitmapout = (u8*)&hostdword, byte_id_in_dword = 0; |
394 | byte_id_in_dword < 4 && src_bytes; |
395 | byte_id_in_dword++, pbitmapout++) { |
396 | for (outbit = 7; outbit >= 0; outbit--) { |
397 | *pbitmapout |= (((*pbitmapin >> inbit) & 1) << outbit); |
398 | mult24++; |
399 | /* next bit */ |
400 | if (mult24 == 3) { |
401 | mult24 = 0; |
402 | inbit--; |
403 | width--; |
404 | } |
405 | |
406 | /* next byte */ |
407 | if (inbit < 0 || width == 0) { |
408 | src_bytes--; |
409 | pbitmapin++; |
410 | inbit = 7; |
411 | |
412 | if (width == 0) { |
413 | width = image->width; |
414 | outbit = 0; |
415 | } |
416 | } |
417 | } |
418 | } |
419 | wait_for_fifo(entries: 1, par); |
420 | aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par); |
421 | } |
422 | } else { |
423 | u32 *pbitmap, dwords = (src_bytes + 3) / 4; |
424 | for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) { |
425 | wait_for_fifo(entries: 1, par); |
426 | aty_st_le32(HOST_DATA0, val: get_unaligned_le32(p: pbitmap), par); |
427 | } |
428 | } |
429 | } |
430 | |