1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright 2019 NXP. |
4 | * |
5 | * Scaling algorithms were contributed by Dzung Hoang <dzung.hoang@nxp.com> |
6 | */ |
7 | |
8 | #include <linux/device.h> |
9 | #include <linux/slab.h> |
10 | |
11 | #include "dcss-dev.h" |
12 | |
13 | #define DCSS_SCALER_CTRL 0x00 |
14 | #define SCALER_EN BIT(0) |
15 | #define REPEAT_EN BIT(4) |
16 | #define SCALE2MEM_EN BIT(8) |
17 | #define MEM2OFIFO_EN BIT(12) |
18 | #define DCSS_SCALER_OFIFO_CTRL 0x04 |
19 | #define OFIFO_LOW_THRES_POS 0 |
20 | #define OFIFO_LOW_THRES_MASK GENMASK(9, 0) |
21 | #define OFIFO_HIGH_THRES_POS 16 |
22 | #define OFIFO_HIGH_THRES_MASK GENMASK(25, 16) |
23 | #define UNDERRUN_DETECT_CLR BIT(26) |
24 | #define LOW_THRES_DETECT_CLR BIT(27) |
25 | #define HIGH_THRES_DETECT_CLR BIT(28) |
26 | #define UNDERRUN_DETECT_EN BIT(29) |
27 | #define LOW_THRES_DETECT_EN BIT(30) |
28 | #define HIGH_THRES_DETECT_EN BIT(31) |
29 | #define DCSS_SCALER_SDATA_CTRL 0x08 |
30 | #define YUV_EN BIT(0) |
31 | #define RTRAM_8LINES BIT(1) |
32 | #define Y_UV_BYTE_SWAP BIT(4) |
33 | #define A2R10G10B10_FORMAT_POS 8 |
34 | #define A2R10G10B10_FORMAT_MASK GENMASK(11, 8) |
35 | #define DCSS_SCALER_BIT_DEPTH 0x0C |
36 | #define LUM_BIT_DEPTH_POS 0 |
37 | #define LUM_BIT_DEPTH_MASK GENMASK(1, 0) |
38 | #define CHR_BIT_DEPTH_POS 4 |
39 | #define CHR_BIT_DEPTH_MASK GENMASK(5, 4) |
40 | #define DCSS_SCALER_SRC_FORMAT 0x10 |
41 | #define DCSS_SCALER_DST_FORMAT 0x14 |
42 | #define FORMAT_MASK GENMASK(1, 0) |
43 | #define DCSS_SCALER_SRC_LUM_RES 0x18 |
44 | #define DCSS_SCALER_SRC_CHR_RES 0x1C |
45 | #define DCSS_SCALER_DST_LUM_RES 0x20 |
46 | #define DCSS_SCALER_DST_CHR_RES 0x24 |
47 | #define WIDTH_POS 0 |
48 | #define WIDTH_MASK GENMASK(11, 0) |
49 | #define HEIGHT_POS 16 |
50 | #define HEIGHT_MASK GENMASK(27, 16) |
51 | #define DCSS_SCALER_V_LUM_START 0x48 |
52 | #define V_START_MASK GENMASK(15, 0) |
53 | #define DCSS_SCALER_V_LUM_INC 0x4C |
54 | #define V_INC_MASK GENMASK(15, 0) |
55 | #define DCSS_SCALER_H_LUM_START 0x50 |
56 | #define H_START_MASK GENMASK(18, 0) |
57 | #define DCSS_SCALER_H_LUM_INC 0x54 |
58 | #define H_INC_MASK GENMASK(15, 0) |
59 | #define DCSS_SCALER_V_CHR_START 0x58 |
60 | #define DCSS_SCALER_V_CHR_INC 0x5C |
61 | #define DCSS_SCALER_H_CHR_START 0x60 |
62 | #define DCSS_SCALER_H_CHR_INC 0x64 |
63 | #define DCSS_SCALER_COEF_VLUM 0x80 |
64 | #define DCSS_SCALER_COEF_HLUM 0x140 |
65 | #define DCSS_SCALER_COEF_VCHR 0x200 |
66 | #define DCSS_SCALER_COEF_HCHR 0x300 |
67 | |
68 | struct dcss_scaler_ch { |
69 | void __iomem *base_reg; |
70 | u32 base_ofs; |
71 | struct dcss_scaler *scl; |
72 | |
73 | u32 sdata_ctrl; |
74 | u32 scaler_ctrl; |
75 | |
76 | bool scaler_ctrl_chgd; |
77 | |
78 | u32 c_vstart; |
79 | u32 c_hstart; |
80 | |
81 | bool use_nn_interpolation; |
82 | }; |
83 | |
84 | struct dcss_scaler { |
85 | struct device *dev; |
86 | |
87 | struct dcss_ctxld *ctxld; |
88 | u32 ctx_id; |
89 | |
90 | struct dcss_scaler_ch ch[3]; |
91 | }; |
92 | |
93 | /* scaler coefficients generator */ |
94 | #define PSC_FRAC_BITS 30 |
95 | #define PSC_FRAC_SCALE BIT(PSC_FRAC_BITS) |
96 | #define PSC_BITS_FOR_PHASE 4 |
97 | #define PSC_NUM_PHASES 16 |
98 | #define PSC_STORED_PHASES (PSC_NUM_PHASES / 2 + 1) |
99 | #define PSC_NUM_TAPS 7 |
100 | #define PSC_NUM_TAPS_RGBA 5 |
101 | #define PSC_COEFF_PRECISION 10 |
102 | #define PSC_PHASE_FRACTION_BITS 13 |
103 | #define PSC_PHASE_MASK (PSC_NUM_PHASES - 1) |
104 | #define PSC_Q_FRACTION 19 |
105 | #define PSC_Q_ROUND_OFFSET (1 << (PSC_Q_FRACTION - 1)) |
106 | |
107 | /** |
108 | * mult_q() - Performs fixed-point multiplication. |
109 | * @A: multiplier |
110 | * @B: multiplicand |
111 | */ |
112 | static int mult_q(int A, int B) |
113 | { |
114 | int result; |
115 | s64 temp; |
116 | |
117 | temp = (int64_t)A * (int64_t)B; |
118 | temp += PSC_Q_ROUND_OFFSET; |
119 | result = (int)(temp >> PSC_Q_FRACTION); |
120 | return result; |
121 | } |
122 | |
123 | /** |
124 | * div_q() - Performs fixed-point division. |
125 | * @A: dividend |
126 | * @B: divisor |
127 | */ |
128 | static int div_q(int A, int B) |
129 | { |
130 | int result; |
131 | s64 temp; |
132 | |
133 | temp = (int64_t)A << PSC_Q_FRACTION; |
134 | if ((temp >= 0 && B >= 0) || (temp < 0 && B < 0)) |
135 | temp += B / 2; |
136 | else |
137 | temp -= B / 2; |
138 | |
139 | result = (int)(temp / B); |
140 | return result; |
141 | } |
142 | |
143 | /** |
144 | * exp_approx_q() - Compute approximation to exp(x) function using Taylor |
145 | * series. |
146 | * @x: fixed-point argument of exp function |
147 | */ |
148 | static int exp_approx_q(int x) |
149 | { |
150 | int sum = 1 << PSC_Q_FRACTION; |
151 | int term = 1 << PSC_Q_FRACTION; |
152 | |
153 | term = mult_q(A: term, B: div_q(A: x, B: 1 << PSC_Q_FRACTION)); |
154 | sum += term; |
155 | term = mult_q(A: term, B: div_q(A: x, B: 2 << PSC_Q_FRACTION)); |
156 | sum += term; |
157 | term = mult_q(A: term, B: div_q(A: x, B: 3 << PSC_Q_FRACTION)); |
158 | sum += term; |
159 | term = mult_q(A: term, B: div_q(A: x, B: 4 << PSC_Q_FRACTION)); |
160 | sum += term; |
161 | |
162 | return sum; |
163 | } |
164 | |
165 | /** |
166 | * dcss_scaler_gaussian_filter() - Generate gaussian prototype filter. |
167 | * @fc_q: fixed-point cutoff frequency normalized to range [0, 1] |
168 | * @use_5_taps: indicates whether to use 5 taps or 7 taps |
169 | * @coef: output filter coefficients |
170 | */ |
171 | static void dcss_scaler_gaussian_filter(int fc_q, bool use_5_taps, |
172 | bool phase0_identity, |
173 | int coef[][PSC_NUM_TAPS]) |
174 | { |
175 | int sigma_q, g0_q, g1_q, g2_q; |
176 | int tap_cnt1, tap_cnt2, tap_idx, phase_cnt; |
177 | int mid; |
178 | int phase; |
179 | int i; |
180 | int taps; |
181 | |
182 | if (use_5_taps) |
183 | for (phase = 0; phase < PSC_STORED_PHASES; phase++) { |
184 | coef[phase][0] = 0; |
185 | coef[phase][PSC_NUM_TAPS - 1] = 0; |
186 | } |
187 | |
188 | /* seed coefficient scanner */ |
189 | taps = use_5_taps ? PSC_NUM_TAPS_RGBA : PSC_NUM_TAPS; |
190 | mid = (PSC_NUM_PHASES * taps) / 2 - 1; |
191 | phase_cnt = (PSC_NUM_PHASES * (PSC_NUM_TAPS + 1)) / 2; |
192 | tap_cnt1 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2; |
193 | tap_cnt2 = (PSC_NUM_PHASES * PSC_NUM_TAPS) / 2; |
194 | |
195 | /* seed gaussian filter generator */ |
196 | sigma_q = div_q(PSC_Q_ROUND_OFFSET, B: fc_q); |
197 | g0_q = 1 << PSC_Q_FRACTION; |
198 | g1_q = exp_approx_q(x: div_q(A: -PSC_Q_ROUND_OFFSET, |
199 | B: mult_q(A: sigma_q, B: sigma_q))); |
200 | g2_q = mult_q(A: g1_q, B: g1_q); |
201 | coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = g0_q; |
202 | |
203 | for (i = 0; i < mid; i++) { |
204 | phase_cnt++; |
205 | tap_cnt1--; |
206 | tap_cnt2++; |
207 | |
208 | g0_q = mult_q(A: g0_q, B: g1_q); |
209 | g1_q = mult_q(A: g1_q, B: g2_q); |
210 | |
211 | if ((phase_cnt & PSC_PHASE_MASK) <= 8) { |
212 | tap_idx = tap_cnt1 >> PSC_BITS_FOR_PHASE; |
213 | coef[phase_cnt & PSC_PHASE_MASK][tap_idx] = g0_q; |
214 | } |
215 | if (((-phase_cnt) & PSC_PHASE_MASK) <= 8) { |
216 | tap_idx = tap_cnt2 >> PSC_BITS_FOR_PHASE; |
217 | coef[(-phase_cnt) & PSC_PHASE_MASK][tap_idx] = g0_q; |
218 | } |
219 | } |
220 | |
221 | phase_cnt++; |
222 | tap_cnt1--; |
223 | coef[phase_cnt & PSC_PHASE_MASK][tap_cnt1 >> PSC_BITS_FOR_PHASE] = 0; |
224 | |
225 | /* override phase 0 with identity filter if specified */ |
226 | if (phase0_identity) |
227 | for (i = 0; i < PSC_NUM_TAPS; i++) |
228 | coef[0][i] = i == (PSC_NUM_TAPS >> 1) ? |
229 | (1 << PSC_COEFF_PRECISION) : 0; |
230 | |
231 | /* normalize coef */ |
232 | for (phase = 0; phase < PSC_STORED_PHASES; phase++) { |
233 | int sum = 0; |
234 | s64 ll_temp; |
235 | |
236 | for (i = 0; i < PSC_NUM_TAPS; i++) |
237 | sum += coef[phase][i]; |
238 | for (i = 0; i < PSC_NUM_TAPS; i++) { |
239 | ll_temp = coef[phase][i]; |
240 | ll_temp <<= PSC_COEFF_PRECISION; |
241 | ll_temp += sum >> 1; |
242 | ll_temp /= sum; |
243 | coef[phase][i] = (int)ll_temp; |
244 | } |
245 | } |
246 | } |
247 | |
248 | static void dcss_scaler_nearest_neighbor_filter(bool use_5_taps, |
249 | int coef[][PSC_NUM_TAPS]) |
250 | { |
251 | int i, j; |
252 | |
253 | for (i = 0; i < PSC_STORED_PHASES; i++) |
254 | for (j = 0; j < PSC_NUM_TAPS; j++) |
255 | coef[i][j] = j == PSC_NUM_TAPS >> 1 ? |
256 | (1 << PSC_COEFF_PRECISION) : 0; |
257 | } |
258 | |
259 | /** |
260 | * dcss_scaler_filter_design() - Compute filter coefficients using |
261 | * Gaussian filter. |
262 | * @src_length: length of input |
263 | * @dst_length: length of output |
264 | * @use_5_taps: 0 for 7 taps per phase, 1 for 5 taps |
265 | * @coef: output coefficients |
266 | */ |
267 | static void dcss_scaler_filter_design(int src_length, int dst_length, |
268 | bool use_5_taps, bool phase0_identity, |
269 | int coef[][PSC_NUM_TAPS], |
270 | bool nn_interpolation) |
271 | { |
272 | int fc_q; |
273 | |
274 | /* compute cutoff frequency */ |
275 | if (dst_length >= src_length) |
276 | fc_q = div_q(A: 1, PSC_NUM_PHASES); |
277 | else |
278 | fc_q = div_q(A: dst_length, B: src_length * PSC_NUM_PHASES); |
279 | |
280 | if (nn_interpolation) |
281 | dcss_scaler_nearest_neighbor_filter(use_5_taps, coef); |
282 | else |
283 | /* compute gaussian filter coefficients */ |
284 | dcss_scaler_gaussian_filter(fc_q, use_5_taps, phase0_identity, coef); |
285 | } |
286 | |
287 | static void dcss_scaler_write(struct dcss_scaler_ch *ch, u32 val, u32 ofs) |
288 | { |
289 | struct dcss_scaler *scl = ch->scl; |
290 | |
291 | dcss_ctxld_write(ctxld: scl->ctxld, ctx_id: scl->ctx_id, val, reg_idx: ch->base_ofs + ofs); |
292 | } |
293 | |
294 | static int dcss_scaler_ch_init_all(struct dcss_scaler *scl, |
295 | unsigned long scaler_base) |
296 | { |
297 | struct dcss_scaler_ch *ch; |
298 | int i; |
299 | |
300 | for (i = 0; i < 3; i++) { |
301 | ch = &scl->ch[i]; |
302 | |
303 | ch->base_ofs = scaler_base + i * 0x400; |
304 | |
305 | ch->base_reg = devm_ioremap(dev: scl->dev, offset: ch->base_ofs, SZ_4K); |
306 | if (!ch->base_reg) { |
307 | dev_err(scl->dev, "scaler: unable to remap ch base\n" ); |
308 | return -ENOMEM; |
309 | } |
310 | |
311 | ch->scl = scl; |
312 | } |
313 | |
314 | return 0; |
315 | } |
316 | |
317 | int dcss_scaler_init(struct dcss_dev *dcss, unsigned long scaler_base) |
318 | { |
319 | struct dcss_scaler *scaler; |
320 | |
321 | scaler = devm_kzalloc(dev: dcss->dev, size: sizeof(*scaler), GFP_KERNEL); |
322 | if (!scaler) |
323 | return -ENOMEM; |
324 | |
325 | dcss->scaler = scaler; |
326 | scaler->dev = dcss->dev; |
327 | scaler->ctxld = dcss->ctxld; |
328 | scaler->ctx_id = CTX_SB_HP; |
329 | |
330 | if (dcss_scaler_ch_init_all(scl: scaler, scaler_base)) |
331 | return -ENOMEM; |
332 | |
333 | return 0; |
334 | } |
335 | |
336 | void dcss_scaler_exit(struct dcss_scaler *scl) |
337 | { |
338 | int ch_no; |
339 | |
340 | for (ch_no = 0; ch_no < 3; ch_no++) { |
341 | struct dcss_scaler_ch *ch = &scl->ch[ch_no]; |
342 | |
343 | dcss_writel(0, ch->base_reg + DCSS_SCALER_CTRL); |
344 | } |
345 | } |
346 | |
347 | void dcss_scaler_ch_enable(struct dcss_scaler *scl, int ch_num, bool en) |
348 | { |
349 | struct dcss_scaler_ch *ch = &scl->ch[ch_num]; |
350 | u32 scaler_ctrl; |
351 | |
352 | scaler_ctrl = en ? SCALER_EN | REPEAT_EN : 0; |
353 | |
354 | if (en) |
355 | dcss_scaler_write(ch, val: ch->sdata_ctrl, DCSS_SCALER_SDATA_CTRL); |
356 | |
357 | if (ch->scaler_ctrl != scaler_ctrl) |
358 | ch->scaler_ctrl_chgd = true; |
359 | |
360 | ch->scaler_ctrl = scaler_ctrl; |
361 | } |
362 | |
363 | static void dcss_scaler_yuv_enable(struct dcss_scaler_ch *ch, bool en) |
364 | { |
365 | ch->sdata_ctrl &= ~YUV_EN; |
366 | ch->sdata_ctrl |= en ? YUV_EN : 0; |
367 | } |
368 | |
369 | static void dcss_scaler_rtr_8lines_enable(struct dcss_scaler_ch *ch, bool en) |
370 | { |
371 | ch->sdata_ctrl &= ~RTRAM_8LINES; |
372 | ch->sdata_ctrl |= en ? RTRAM_8LINES : 0; |
373 | } |
374 | |
375 | static void dcss_scaler_bit_depth_set(struct dcss_scaler_ch *ch, int depth) |
376 | { |
377 | u32 val; |
378 | |
379 | val = depth == 30 ? 2 : 0; |
380 | |
381 | dcss_scaler_write(ch, |
382 | val: ((val << CHR_BIT_DEPTH_POS) & CHR_BIT_DEPTH_MASK) | |
383 | ((val << LUM_BIT_DEPTH_POS) & LUM_BIT_DEPTH_MASK), |
384 | DCSS_SCALER_BIT_DEPTH); |
385 | } |
386 | |
387 | enum buffer_format { |
388 | BUF_FMT_YUV420, |
389 | BUF_FMT_YUV422, |
390 | BUF_FMT_ARGB8888_YUV444, |
391 | }; |
392 | |
393 | enum chroma_location { |
394 | PSC_LOC_HORZ_0_VERT_1_OVER_4 = 0, |
395 | PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4 = 1, |
396 | PSC_LOC_HORZ_0_VERT_0 = 2, |
397 | PSC_LOC_HORZ_1_OVER_4_VERT_0 = 3, |
398 | PSC_LOC_HORZ_0_VERT_1_OVER_2 = 4, |
399 | PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2 = 5 |
400 | }; |
401 | |
402 | static void dcss_scaler_format_set(struct dcss_scaler_ch *ch, |
403 | enum buffer_format src_fmt, |
404 | enum buffer_format dst_fmt) |
405 | { |
406 | dcss_scaler_write(ch, val: src_fmt, DCSS_SCALER_SRC_FORMAT); |
407 | dcss_scaler_write(ch, val: dst_fmt, DCSS_SCALER_DST_FORMAT); |
408 | } |
409 | |
410 | static void dcss_scaler_res_set(struct dcss_scaler_ch *ch, |
411 | int src_xres, int src_yres, |
412 | int dst_xres, int dst_yres, |
413 | u32 pix_format, enum buffer_format dst_format) |
414 | { |
415 | u32 lsrc_xres, lsrc_yres, csrc_xres, csrc_yres; |
416 | u32 ldst_xres, ldst_yres, cdst_xres, cdst_yres; |
417 | bool src_is_444 = true; |
418 | |
419 | lsrc_xres = src_xres; |
420 | csrc_xres = src_xres; |
421 | lsrc_yres = src_yres; |
422 | csrc_yres = src_yres; |
423 | ldst_xres = dst_xres; |
424 | cdst_xres = dst_xres; |
425 | ldst_yres = dst_yres; |
426 | cdst_yres = dst_yres; |
427 | |
428 | if (pix_format == DRM_FORMAT_UYVY || pix_format == DRM_FORMAT_VYUY || |
429 | pix_format == DRM_FORMAT_YUYV || pix_format == DRM_FORMAT_YVYU) { |
430 | csrc_xres >>= 1; |
431 | src_is_444 = false; |
432 | } else if (pix_format == DRM_FORMAT_NV12 || |
433 | pix_format == DRM_FORMAT_NV21) { |
434 | csrc_xres >>= 1; |
435 | csrc_yres >>= 1; |
436 | src_is_444 = false; |
437 | } |
438 | |
439 | if (dst_format == BUF_FMT_YUV422) |
440 | cdst_xres >>= 1; |
441 | |
442 | /* for 4:4:4 to 4:2:2 conversion, source height should be 1 less */ |
443 | if (src_is_444 && dst_format == BUF_FMT_YUV422) { |
444 | lsrc_yres--; |
445 | csrc_yres--; |
446 | } |
447 | |
448 | dcss_scaler_write(ch, val: (((lsrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) | |
449 | (((lsrc_xres - 1) << WIDTH_POS) & WIDTH_MASK), |
450 | DCSS_SCALER_SRC_LUM_RES); |
451 | dcss_scaler_write(ch, val: (((csrc_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) | |
452 | (((csrc_xres - 1) << WIDTH_POS) & WIDTH_MASK), |
453 | DCSS_SCALER_SRC_CHR_RES); |
454 | dcss_scaler_write(ch, val: (((ldst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) | |
455 | (((ldst_xres - 1) << WIDTH_POS) & WIDTH_MASK), |
456 | DCSS_SCALER_DST_LUM_RES); |
457 | dcss_scaler_write(ch, val: (((cdst_yres - 1) << HEIGHT_POS) & HEIGHT_MASK) | |
458 | (((cdst_xres - 1) << WIDTH_POS) & WIDTH_MASK), |
459 | DCSS_SCALER_DST_CHR_RES); |
460 | } |
461 | |
462 | #define downscale_fp(factor, fp_pos) ((factor) << (fp_pos)) |
463 | #define upscale_fp(factor, fp_pos) ((1 << (fp_pos)) / (factor)) |
464 | |
465 | struct dcss_scaler_factors { |
466 | int downscale; |
467 | int upscale; |
468 | }; |
469 | |
470 | static const struct dcss_scaler_factors dcss_scaler_factors[] = { |
471 | {3, 8}, {5, 8}, {5, 8}, |
472 | }; |
473 | |
474 | static void dcss_scaler_fractions_set(struct dcss_scaler_ch *ch, |
475 | int src_xres, int src_yres, |
476 | int dst_xres, int dst_yres, |
477 | u32 src_format, u32 dst_format, |
478 | enum chroma_location src_chroma_loc) |
479 | { |
480 | int src_c_xres, src_c_yres, dst_c_xres, dst_c_yres; |
481 | u32 l_vinc, l_hinc, c_vinc, c_hinc; |
482 | u32 c_vstart, c_hstart; |
483 | |
484 | src_c_xres = src_xres; |
485 | src_c_yres = src_yres; |
486 | dst_c_xres = dst_xres; |
487 | dst_c_yres = dst_yres; |
488 | |
489 | c_vstart = 0; |
490 | c_hstart = 0; |
491 | |
492 | /* adjustments for source chroma location */ |
493 | if (src_format == BUF_FMT_YUV420) { |
494 | /* vertical input chroma position adjustment */ |
495 | switch (src_chroma_loc) { |
496 | case PSC_LOC_HORZ_0_VERT_1_OVER_4: |
497 | case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4: |
498 | /* |
499 | * move chroma up to first luma line |
500 | * (1/4 chroma input line spacing) |
501 | */ |
502 | c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2)); |
503 | break; |
504 | case PSC_LOC_HORZ_0_VERT_1_OVER_2: |
505 | case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2: |
506 | /* |
507 | * move chroma up to first luma line |
508 | * (1/2 chroma input line spacing) |
509 | */ |
510 | c_vstart -= (1 << (PSC_PHASE_FRACTION_BITS - 1)); |
511 | break; |
512 | default: |
513 | break; |
514 | } |
515 | /* horizontal input chroma position adjustment */ |
516 | switch (src_chroma_loc) { |
517 | case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_4: |
518 | case PSC_LOC_HORZ_1_OVER_4_VERT_0: |
519 | case PSC_LOC_HORZ_1_OVER_4_VERT_1_OVER_2: |
520 | /* move chroma left 1/4 chroma input sample spacing */ |
521 | c_hstart -= (1 << (PSC_PHASE_FRACTION_BITS - 2)); |
522 | break; |
523 | default: |
524 | break; |
525 | } |
526 | } |
527 | |
528 | /* adjustments to chroma resolution */ |
529 | if (src_format == BUF_FMT_YUV420) { |
530 | src_c_xres >>= 1; |
531 | src_c_yres >>= 1; |
532 | } else if (src_format == BUF_FMT_YUV422) { |
533 | src_c_xres >>= 1; |
534 | } |
535 | |
536 | if (dst_format == BUF_FMT_YUV422) |
537 | dst_c_xres >>= 1; |
538 | |
539 | l_vinc = ((src_yres << 13) + (dst_yres >> 1)) / dst_yres; |
540 | c_vinc = ((src_c_yres << 13) + (dst_c_yres >> 1)) / dst_c_yres; |
541 | l_hinc = ((src_xres << 13) + (dst_xres >> 1)) / dst_xres; |
542 | c_hinc = ((src_c_xres << 13) + (dst_c_xres >> 1)) / dst_c_xres; |
543 | |
544 | /* save chroma start phase */ |
545 | ch->c_vstart = c_vstart; |
546 | ch->c_hstart = c_hstart; |
547 | |
548 | dcss_scaler_write(ch, val: 0, DCSS_SCALER_V_LUM_START); |
549 | dcss_scaler_write(ch, val: l_vinc, DCSS_SCALER_V_LUM_INC); |
550 | |
551 | dcss_scaler_write(ch, val: 0, DCSS_SCALER_H_LUM_START); |
552 | dcss_scaler_write(ch, val: l_hinc, DCSS_SCALER_H_LUM_INC); |
553 | |
554 | dcss_scaler_write(ch, val: c_vstart, DCSS_SCALER_V_CHR_START); |
555 | dcss_scaler_write(ch, val: c_vinc, DCSS_SCALER_V_CHR_INC); |
556 | |
557 | dcss_scaler_write(ch, val: c_hstart, DCSS_SCALER_H_CHR_START); |
558 | dcss_scaler_write(ch, val: c_hinc, DCSS_SCALER_H_CHR_INC); |
559 | } |
560 | |
561 | int dcss_scaler_get_min_max_ratios(struct dcss_scaler *scl, int ch_num, |
562 | int *min, int *max) |
563 | { |
564 | *min = upscale_fp(dcss_scaler_factors[ch_num].upscale, 16); |
565 | *max = downscale_fp(dcss_scaler_factors[ch_num].downscale, 16); |
566 | |
567 | return 0; |
568 | } |
569 | |
570 | static void dcss_scaler_program_5_coef_set(struct dcss_scaler_ch *ch, |
571 | int base_addr, |
572 | int coef[][PSC_NUM_TAPS]) |
573 | { |
574 | int i, phase; |
575 | |
576 | for (i = 0; i < PSC_STORED_PHASES; i++) { |
577 | dcss_scaler_write(ch, val: ((coef[i][1] & 0xfff) << 16 | |
578 | (coef[i][2] & 0xfff) << 4 | |
579 | (coef[i][3] & 0xf00) >> 8), |
580 | ofs: base_addr + i * sizeof(u32)); |
581 | dcss_scaler_write(ch, val: ((coef[i][3] & 0x0ff) << 20 | |
582 | (coef[i][4] & 0xfff) << 8 | |
583 | (coef[i][5] & 0xff0) >> 4), |
584 | ofs: base_addr + 0x40 + i * sizeof(u32)); |
585 | dcss_scaler_write(ch, val: ((coef[i][5] & 0x00f) << 24), |
586 | ofs: base_addr + 0x80 + i * sizeof(u32)); |
587 | } |
588 | |
589 | /* reverse both phase and tap orderings */ |
590 | for (phase = (PSC_NUM_PHASES >> 1) - 1; |
591 | i < PSC_NUM_PHASES; i++, phase--) { |
592 | dcss_scaler_write(ch, val: ((coef[phase][5] & 0xfff) << 16 | |
593 | (coef[phase][4] & 0xfff) << 4 | |
594 | (coef[phase][3] & 0xf00) >> 8), |
595 | ofs: base_addr + i * sizeof(u32)); |
596 | dcss_scaler_write(ch, val: ((coef[phase][3] & 0x0ff) << 20 | |
597 | (coef[phase][2] & 0xfff) << 8 | |
598 | (coef[phase][1] & 0xff0) >> 4), |
599 | ofs: base_addr + 0x40 + i * sizeof(u32)); |
600 | dcss_scaler_write(ch, val: ((coef[phase][1] & 0x00f) << 24), |
601 | ofs: base_addr + 0x80 + i * sizeof(u32)); |
602 | } |
603 | } |
604 | |
605 | static void dcss_scaler_program_7_coef_set(struct dcss_scaler_ch *ch, |
606 | int base_addr, |
607 | int coef[][PSC_NUM_TAPS]) |
608 | { |
609 | int i, phase; |
610 | |
611 | for (i = 0; i < PSC_STORED_PHASES; i++) { |
612 | dcss_scaler_write(ch, val: ((coef[i][0] & 0xfff) << 16 | |
613 | (coef[i][1] & 0xfff) << 4 | |
614 | (coef[i][2] & 0xf00) >> 8), |
615 | ofs: base_addr + i * sizeof(u32)); |
616 | dcss_scaler_write(ch, val: ((coef[i][2] & 0x0ff) << 20 | |
617 | (coef[i][3] & 0xfff) << 8 | |
618 | (coef[i][4] & 0xff0) >> 4), |
619 | ofs: base_addr + 0x40 + i * sizeof(u32)); |
620 | dcss_scaler_write(ch, val: ((coef[i][4] & 0x00f) << 24 | |
621 | (coef[i][5] & 0xfff) << 12 | |
622 | (coef[i][6] & 0xfff)), |
623 | ofs: base_addr + 0x80 + i * sizeof(u32)); |
624 | } |
625 | |
626 | /* reverse both phase and tap orderings */ |
627 | for (phase = (PSC_NUM_PHASES >> 1) - 1; |
628 | i < PSC_NUM_PHASES; i++, phase--) { |
629 | dcss_scaler_write(ch, val: ((coef[phase][6] & 0xfff) << 16 | |
630 | (coef[phase][5] & 0xfff) << 4 | |
631 | (coef[phase][4] & 0xf00) >> 8), |
632 | ofs: base_addr + i * sizeof(u32)); |
633 | dcss_scaler_write(ch, val: ((coef[phase][4] & 0x0ff) << 20 | |
634 | (coef[phase][3] & 0xfff) << 8 | |
635 | (coef[phase][2] & 0xff0) >> 4), |
636 | ofs: base_addr + 0x40 + i * sizeof(u32)); |
637 | dcss_scaler_write(ch, val: ((coef[phase][2] & 0x00f) << 24 | |
638 | (coef[phase][1] & 0xfff) << 12 | |
639 | (coef[phase][0] & 0xfff)), |
640 | ofs: base_addr + 0x80 + i * sizeof(u32)); |
641 | } |
642 | } |
643 | |
644 | static void dcss_scaler_yuv_coef_set(struct dcss_scaler_ch *ch, |
645 | enum buffer_format src_format, |
646 | enum buffer_format dst_format, |
647 | bool use_5_taps, |
648 | int src_xres, int src_yres, int dst_xres, |
649 | int dst_yres) |
650 | { |
651 | int coef[PSC_STORED_PHASES][PSC_NUM_TAPS]; |
652 | bool program_5_taps = use_5_taps || |
653 | (dst_format == BUF_FMT_YUV422 && |
654 | src_format == BUF_FMT_ARGB8888_YUV444); |
655 | |
656 | /* horizontal luma */ |
657 | dcss_scaler_filter_design(src_length: src_xres, dst_length: dst_xres, use_5_taps: false, |
658 | phase0_identity: src_xres == dst_xres, coef, |
659 | nn_interpolation: ch->use_nn_interpolation); |
660 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef); |
661 | |
662 | /* vertical luma */ |
663 | dcss_scaler_filter_design(src_length: src_yres, dst_length: dst_yres, use_5_taps: program_5_taps, |
664 | phase0_identity: src_yres == dst_yres, coef, |
665 | nn_interpolation: ch->use_nn_interpolation); |
666 | |
667 | if (program_5_taps) |
668 | dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef); |
669 | else |
670 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef); |
671 | |
672 | /* adjust chroma resolution */ |
673 | if (src_format != BUF_FMT_ARGB8888_YUV444) |
674 | src_xres >>= 1; |
675 | if (src_format == BUF_FMT_YUV420) |
676 | src_yres >>= 1; |
677 | if (dst_format != BUF_FMT_ARGB8888_YUV444) |
678 | dst_xres >>= 1; |
679 | if (dst_format == BUF_FMT_YUV420) /* should not happen */ |
680 | dst_yres >>= 1; |
681 | |
682 | /* horizontal chroma */ |
683 | dcss_scaler_filter_design(src_length: src_xres, dst_length: dst_xres, use_5_taps: false, |
684 | phase0_identity: (src_xres == dst_xres) && (ch->c_hstart == 0), |
685 | coef, nn_interpolation: ch->use_nn_interpolation); |
686 | |
687 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HCHR, coef); |
688 | |
689 | /* vertical chroma */ |
690 | dcss_scaler_filter_design(src_length: src_yres, dst_length: dst_yres, use_5_taps: program_5_taps, |
691 | phase0_identity: (src_yres == dst_yres) && (ch->c_vstart == 0), |
692 | coef, nn_interpolation: ch->use_nn_interpolation); |
693 | if (program_5_taps) |
694 | dcss_scaler_program_5_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef); |
695 | else |
696 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VCHR, coef); |
697 | } |
698 | |
699 | static void dcss_scaler_rgb_coef_set(struct dcss_scaler_ch *ch, |
700 | int src_xres, int src_yres, int dst_xres, |
701 | int dst_yres) |
702 | { |
703 | int coef[PSC_STORED_PHASES][PSC_NUM_TAPS]; |
704 | |
705 | /* horizontal RGB */ |
706 | dcss_scaler_filter_design(src_length: src_xres, dst_length: dst_xres, use_5_taps: false, |
707 | phase0_identity: src_xres == dst_xres, coef, |
708 | nn_interpolation: ch->use_nn_interpolation); |
709 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_HLUM, coef); |
710 | |
711 | /* vertical RGB */ |
712 | dcss_scaler_filter_design(src_length: src_yres, dst_length: dst_yres, use_5_taps: false, |
713 | phase0_identity: src_yres == dst_yres, coef, |
714 | nn_interpolation: ch->use_nn_interpolation); |
715 | dcss_scaler_program_7_coef_set(ch, DCSS_SCALER_COEF_VLUM, coef); |
716 | } |
717 | |
718 | static void dcss_scaler_set_rgb10_order(struct dcss_scaler_ch *ch, |
719 | const struct drm_format_info *format) |
720 | { |
721 | u32 a2r10g10b10_format; |
722 | |
723 | if (format->is_yuv) |
724 | return; |
725 | |
726 | ch->sdata_ctrl &= ~A2R10G10B10_FORMAT_MASK; |
727 | |
728 | if (format->depth != 30) |
729 | return; |
730 | |
731 | switch (format->format) { |
732 | case DRM_FORMAT_ARGB2101010: |
733 | case DRM_FORMAT_XRGB2101010: |
734 | a2r10g10b10_format = 0; |
735 | break; |
736 | |
737 | case DRM_FORMAT_ABGR2101010: |
738 | case DRM_FORMAT_XBGR2101010: |
739 | a2r10g10b10_format = 5; |
740 | break; |
741 | |
742 | case DRM_FORMAT_RGBA1010102: |
743 | case DRM_FORMAT_RGBX1010102: |
744 | a2r10g10b10_format = 6; |
745 | break; |
746 | |
747 | case DRM_FORMAT_BGRA1010102: |
748 | case DRM_FORMAT_BGRX1010102: |
749 | a2r10g10b10_format = 11; |
750 | break; |
751 | |
752 | default: |
753 | a2r10g10b10_format = 0; |
754 | break; |
755 | } |
756 | |
757 | ch->sdata_ctrl |= a2r10g10b10_format << A2R10G10B10_FORMAT_POS; |
758 | } |
759 | |
760 | void dcss_scaler_set_filter(struct dcss_scaler *scl, int ch_num, |
761 | enum drm_scaling_filter scaling_filter) |
762 | { |
763 | struct dcss_scaler_ch *ch = &scl->ch[ch_num]; |
764 | |
765 | ch->use_nn_interpolation = scaling_filter == DRM_SCALING_FILTER_NEAREST_NEIGHBOR; |
766 | } |
767 | |
768 | void dcss_scaler_setup(struct dcss_scaler *scl, int ch_num, |
769 | const struct drm_format_info *format, |
770 | int src_xres, int src_yres, int dst_xres, int dst_yres, |
771 | u32 vrefresh_hz) |
772 | { |
773 | struct dcss_scaler_ch *ch = &scl->ch[ch_num]; |
774 | unsigned int pixel_depth = 0; |
775 | bool rtr_8line_en = false; |
776 | bool use_5_taps = false; |
777 | enum buffer_format src_format = BUF_FMT_ARGB8888_YUV444; |
778 | enum buffer_format dst_format = BUF_FMT_ARGB8888_YUV444; |
779 | u32 pix_format = format->format; |
780 | |
781 | if (format->is_yuv) { |
782 | dcss_scaler_yuv_enable(ch, en: true); |
783 | |
784 | if (pix_format == DRM_FORMAT_NV12 || |
785 | pix_format == DRM_FORMAT_NV21) { |
786 | rtr_8line_en = true; |
787 | src_format = BUF_FMT_YUV420; |
788 | } else if (pix_format == DRM_FORMAT_UYVY || |
789 | pix_format == DRM_FORMAT_VYUY || |
790 | pix_format == DRM_FORMAT_YUYV || |
791 | pix_format == DRM_FORMAT_YVYU) { |
792 | src_format = BUF_FMT_YUV422; |
793 | } |
794 | |
795 | use_5_taps = !rtr_8line_en; |
796 | } else { |
797 | dcss_scaler_yuv_enable(ch, en: false); |
798 | |
799 | pixel_depth = format->depth; |
800 | } |
801 | |
802 | dcss_scaler_fractions_set(ch, src_xres, src_yres, dst_xres, |
803 | dst_yres, src_format, dst_format, |
804 | src_chroma_loc: PSC_LOC_HORZ_0_VERT_1_OVER_4); |
805 | |
806 | if (format->is_yuv) |
807 | dcss_scaler_yuv_coef_set(ch, src_format, dst_format, |
808 | use_5_taps, src_xres, src_yres, |
809 | dst_xres, dst_yres); |
810 | else |
811 | dcss_scaler_rgb_coef_set(ch, src_xres, src_yres, |
812 | dst_xres, dst_yres); |
813 | |
814 | dcss_scaler_rtr_8lines_enable(ch, en: rtr_8line_en); |
815 | dcss_scaler_bit_depth_set(ch, depth: pixel_depth); |
816 | dcss_scaler_set_rgb10_order(ch, format); |
817 | dcss_scaler_format_set(ch, src_fmt: src_format, dst_fmt: dst_format); |
818 | dcss_scaler_res_set(ch, src_xres, src_yres, dst_xres, dst_yres, |
819 | pix_format, dst_format); |
820 | } |
821 | |
822 | /* This function will be called from interrupt context. */ |
823 | void dcss_scaler_write_sclctrl(struct dcss_scaler *scl) |
824 | { |
825 | int chnum; |
826 | |
827 | dcss_ctxld_assert_locked(ctxld: scl->ctxld); |
828 | |
829 | for (chnum = 0; chnum < 3; chnum++) { |
830 | struct dcss_scaler_ch *ch = &scl->ch[chnum]; |
831 | |
832 | if (ch->scaler_ctrl_chgd) { |
833 | dcss_ctxld_write_irqsafe(ctlxd: scl->ctxld, ctx_id: scl->ctx_id, |
834 | val: ch->scaler_ctrl, |
835 | reg_ofs: ch->base_ofs + |
836 | DCSS_SCALER_CTRL); |
837 | ch->scaler_ctrl_chgd = false; |
838 | } |
839 | } |
840 | } |
841 | |