1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Rockchip Video Decoder VP9 backend |
4 | * |
5 | * Copyright (C) 2019 Collabora, Ltd. |
6 | * Boris Brezillon <boris.brezillon@collabora.com> |
7 | * Copyright (C) 2021 Collabora, Ltd. |
8 | * Andrzej Pietrasiewicz <andrzej.p@collabora.com> |
9 | * |
10 | * Copyright (C) 2016 Rockchip Electronics Co., Ltd. |
11 | * Alpha Lin <Alpha.Lin@rock-chips.com> |
12 | */ |
13 | |
14 | /* |
15 | * For following the vp9 spec please start reading this driver |
16 | * code from rkvdec_vp9_run() followed by rkvdec_vp9_done(). |
17 | */ |
18 | |
19 | #include <linux/kernel.h> |
20 | #include <linux/vmalloc.h> |
21 | #include <media/v4l2-mem2mem.h> |
22 | #include <media/v4l2-vp9.h> |
23 | |
24 | #include "rkvdec.h" |
25 | #include "rkvdec-regs.h" |
26 | |
27 | #define RKVDEC_VP9_PROBE_SIZE 4864 |
28 | #define RKVDEC_VP9_COUNT_SIZE 13232 |
29 | #define RKVDEC_VP9_MAX_SEGMAP_SIZE 73728 |
30 | |
31 | struct rkvdec_vp9_intra_mode_probs { |
32 | u8 y_mode[105]; |
33 | u8 uv_mode[23]; |
34 | }; |
35 | |
36 | struct rkvdec_vp9_intra_only_frame_probs { |
37 | u8 coef_intra[4][2][128]; |
38 | struct rkvdec_vp9_intra_mode_probs intra_mode[10]; |
39 | }; |
40 | |
41 | struct rkvdec_vp9_inter_frame_probs { |
42 | u8 y_mode[4][9]; |
43 | u8 comp_mode[5]; |
44 | u8 comp_ref[5]; |
45 | u8 single_ref[5][2]; |
46 | u8 inter_mode[7][3]; |
47 | u8 interp_filter[4][2]; |
48 | u8 padding0[11]; |
49 | u8 coef[2][4][2][128]; |
50 | u8 uv_mode_0_2[3][9]; |
51 | u8 padding1[5]; |
52 | u8 uv_mode_3_5[3][9]; |
53 | u8 padding2[5]; |
54 | u8 uv_mode_6_8[3][9]; |
55 | u8 padding3[5]; |
56 | u8 uv_mode_9[9]; |
57 | u8 padding4[7]; |
58 | u8 padding5[16]; |
59 | struct { |
60 | u8 joint[3]; |
61 | u8 sign[2]; |
62 | u8 classes[2][10]; |
63 | u8 class0_bit[2]; |
64 | u8 bits[2][10]; |
65 | u8 class0_fr[2][2][3]; |
66 | u8 fr[2][3]; |
67 | u8 class0_hp[2]; |
68 | u8 hp[2]; |
69 | } mv; |
70 | }; |
71 | |
72 | struct rkvdec_vp9_probs { |
73 | u8 partition[16][3]; |
74 | u8 pred[3]; |
75 | u8 tree[7]; |
76 | u8 skip[3]; |
77 | u8 tx32[2][3]; |
78 | u8 tx16[2][2]; |
79 | u8 tx8[2][1]; |
80 | u8 is_inter[4]; |
81 | /* 128 bit alignment */ |
82 | u8 padding0[3]; |
83 | union { |
84 | struct rkvdec_vp9_inter_frame_probs inter; |
85 | struct rkvdec_vp9_intra_only_frame_probs intra_only; |
86 | }; |
87 | /* 128 bit alignment */ |
88 | u8 padding1[11]; |
89 | }; |
90 | |
91 | /* Data structure describing auxiliary buffer format. */ |
92 | struct rkvdec_vp9_priv_tbl { |
93 | struct rkvdec_vp9_probs probs; |
94 | u8 segmap[2][RKVDEC_VP9_MAX_SEGMAP_SIZE]; |
95 | }; |
96 | |
97 | struct rkvdec_vp9_refs_counts { |
98 | u32 eob[2]; |
99 | u32 coeff[3]; |
100 | }; |
101 | |
102 | struct rkvdec_vp9_inter_frame_symbol_counts { |
103 | u32 partition[16][4]; |
104 | u32 skip[3][2]; |
105 | u32 inter[4][2]; |
106 | u32 tx32p[2][4]; |
107 | u32 tx16p[2][4]; |
108 | u32 tx8p[2][2]; |
109 | u32 y_mode[4][10]; |
110 | u32 uv_mode[10][10]; |
111 | u32 comp[5][2]; |
112 | u32 comp_ref[5][2]; |
113 | u32 single_ref[5][2][2]; |
114 | u32 mv_mode[7][4]; |
115 | u32 filter[4][3]; |
116 | u32 mv_joint[4]; |
117 | u32 sign[2][2]; |
118 | /* add 1 element for align */ |
119 | u32 classes[2][11 + 1]; |
120 | u32 class0[2][2]; |
121 | u32 bits[2][10][2]; |
122 | u32 class0_fp[2][2][4]; |
123 | u32 fp[2][4]; |
124 | u32 class0_hp[2][2]; |
125 | u32 hp[2][2]; |
126 | struct rkvdec_vp9_refs_counts ref_cnt[2][4][2][6][6]; |
127 | }; |
128 | |
129 | struct rkvdec_vp9_intra_frame_symbol_counts { |
130 | u32 partition[4][4][4]; |
131 | u32 skip[3][2]; |
132 | u32 intra[4][2]; |
133 | u32 tx32p[2][4]; |
134 | u32 tx16p[2][4]; |
135 | u32 tx8p[2][2]; |
136 | struct rkvdec_vp9_refs_counts ref_cnt[2][4][2][6][6]; |
137 | }; |
138 | |
139 | struct rkvdec_vp9_run { |
140 | struct rkvdec_run base; |
141 | const struct v4l2_ctrl_vp9_frame *decode_params; |
142 | }; |
143 | |
144 | struct rkvdec_vp9_frame_info { |
145 | u32 valid : 1; |
146 | u32 segmapid : 1; |
147 | u32 frame_context_idx : 2; |
148 | u32 reference_mode : 2; |
149 | u32 tx_mode : 3; |
150 | u32 interpolation_filter : 3; |
151 | u32 flags; |
152 | u64 timestamp; |
153 | struct v4l2_vp9_segmentation seg; |
154 | struct v4l2_vp9_loop_filter lf; |
155 | }; |
156 | |
157 | struct rkvdec_vp9_ctx { |
158 | struct rkvdec_aux_buf priv_tbl; |
159 | struct rkvdec_aux_buf count_tbl; |
160 | struct v4l2_vp9_frame_symbol_counts inter_cnts; |
161 | struct v4l2_vp9_frame_symbol_counts intra_cnts; |
162 | struct v4l2_vp9_frame_context probability_tables; |
163 | struct v4l2_vp9_frame_context frame_context[4]; |
164 | struct rkvdec_vp9_frame_info cur; |
165 | struct rkvdec_vp9_frame_info last; |
166 | }; |
167 | |
168 | static void write_coeff_plane(const u8 coef[6][6][3], u8 *coeff_plane) |
169 | { |
170 | unsigned int idx = 0, byte_count = 0; |
171 | int k, m, n; |
172 | u8 p; |
173 | |
174 | for (k = 0; k < 6; k++) { |
175 | for (m = 0; m < 6; m++) { |
176 | for (n = 0; n < 3; n++) { |
177 | p = coef[k][m][n]; |
178 | coeff_plane[idx++] = p; |
179 | byte_count++; |
180 | if (byte_count == 27) { |
181 | idx += 5; |
182 | byte_count = 0; |
183 | } |
184 | } |
185 | } |
186 | } |
187 | } |
188 | |
189 | static void init_intra_only_probs(struct rkvdec_ctx *ctx, |
190 | const struct rkvdec_vp9_run *run) |
191 | { |
192 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
193 | struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu; |
194 | struct rkvdec_vp9_intra_only_frame_probs *rkprobs; |
195 | const struct v4l2_vp9_frame_context *probs; |
196 | unsigned int i, j, k; |
197 | |
198 | rkprobs = &tbl->probs.intra_only; |
199 | probs = &vp9_ctx->probability_tables; |
200 | |
201 | /* |
202 | * intra only 149 x 128 bits ,aligned to 152 x 128 bits coeff related |
203 | * prob 64 x 128 bits |
204 | */ |
205 | for (i = 0; i < ARRAY_SIZE(probs->coef); i++) { |
206 | for (j = 0; j < ARRAY_SIZE(probs->coef[0]); j++) |
207 | write_coeff_plane(coef: probs->coef[i][j][0], |
208 | coeff_plane: rkprobs->coef_intra[i][j]); |
209 | } |
210 | |
211 | /* intra mode prob 80 x 128 bits */ |
212 | for (i = 0; i < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob); i++) { |
213 | unsigned int byte_count = 0; |
214 | int idx = 0; |
215 | |
216 | /* vp9_kf_y_mode_prob */ |
217 | for (j = 0; j < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob[0]); j++) { |
218 | for (k = 0; k < ARRAY_SIZE(v4l2_vp9_kf_y_mode_prob[0][0]); |
219 | k++) { |
220 | u8 val = v4l2_vp9_kf_y_mode_prob[i][j][k]; |
221 | |
222 | rkprobs->intra_mode[i].y_mode[idx++] = val; |
223 | byte_count++; |
224 | if (byte_count == 27) { |
225 | byte_count = 0; |
226 | idx += 5; |
227 | } |
228 | } |
229 | } |
230 | } |
231 | |
232 | for (i = 0; i < sizeof(v4l2_vp9_kf_uv_mode_prob); ++i) { |
233 | const u8 *ptr = (const u8 *)v4l2_vp9_kf_uv_mode_prob; |
234 | |
235 | rkprobs->intra_mode[i / 23].uv_mode[i % 23] = ptr[i]; |
236 | } |
237 | } |
238 | |
239 | static void init_inter_probs(struct rkvdec_ctx *ctx, |
240 | const struct rkvdec_vp9_run *run) |
241 | { |
242 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
243 | struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu; |
244 | struct rkvdec_vp9_inter_frame_probs *rkprobs; |
245 | const struct v4l2_vp9_frame_context *probs; |
246 | unsigned int i, j, k; |
247 | |
248 | rkprobs = &tbl->probs.inter; |
249 | probs = &vp9_ctx->probability_tables; |
250 | |
251 | /* |
252 | * inter probs |
253 | * 151 x 128 bits, aligned to 152 x 128 bits |
254 | * inter only |
255 | * intra_y_mode & inter_block info 6 x 128 bits |
256 | */ |
257 | |
258 | memcpy(rkprobs->y_mode, probs->y_mode, sizeof(rkprobs->y_mode)); |
259 | memcpy(rkprobs->comp_mode, probs->comp_mode, |
260 | sizeof(rkprobs->comp_mode)); |
261 | memcpy(rkprobs->comp_ref, probs->comp_ref, |
262 | sizeof(rkprobs->comp_ref)); |
263 | memcpy(rkprobs->single_ref, probs->single_ref, |
264 | sizeof(rkprobs->single_ref)); |
265 | memcpy(rkprobs->inter_mode, probs->inter_mode, |
266 | sizeof(rkprobs->inter_mode)); |
267 | memcpy(rkprobs->interp_filter, probs->interp_filter, |
268 | sizeof(rkprobs->interp_filter)); |
269 | |
270 | /* 128 x 128 bits coeff related */ |
271 | for (i = 0; i < ARRAY_SIZE(probs->coef); i++) { |
272 | for (j = 0; j < ARRAY_SIZE(probs->coef[0]); j++) { |
273 | for (k = 0; k < ARRAY_SIZE(probs->coef[0][0]); k++) |
274 | write_coeff_plane(coef: probs->coef[i][j][k], |
275 | coeff_plane: rkprobs->coef[k][i][j]); |
276 | } |
277 | } |
278 | |
279 | /* intra uv mode 6 x 128 */ |
280 | memcpy(rkprobs->uv_mode_0_2, &probs->uv_mode[0], |
281 | sizeof(rkprobs->uv_mode_0_2)); |
282 | memcpy(rkprobs->uv_mode_3_5, &probs->uv_mode[3], |
283 | sizeof(rkprobs->uv_mode_3_5)); |
284 | memcpy(rkprobs->uv_mode_6_8, &probs->uv_mode[6], |
285 | sizeof(rkprobs->uv_mode_6_8)); |
286 | memcpy(rkprobs->uv_mode_9, &probs->uv_mode[9], |
287 | sizeof(rkprobs->uv_mode_9)); |
288 | |
289 | /* mv related 6 x 128 */ |
290 | memcpy(rkprobs->mv.joint, probs->mv.joint, |
291 | sizeof(rkprobs->mv.joint)); |
292 | memcpy(rkprobs->mv.sign, probs->mv.sign, |
293 | sizeof(rkprobs->mv.sign)); |
294 | memcpy(rkprobs->mv.classes, probs->mv.classes, |
295 | sizeof(rkprobs->mv.classes)); |
296 | memcpy(rkprobs->mv.class0_bit, probs->mv.class0_bit, |
297 | sizeof(rkprobs->mv.class0_bit)); |
298 | memcpy(rkprobs->mv.bits, probs->mv.bits, |
299 | sizeof(rkprobs->mv.bits)); |
300 | memcpy(rkprobs->mv.class0_fr, probs->mv.class0_fr, |
301 | sizeof(rkprobs->mv.class0_fr)); |
302 | memcpy(rkprobs->mv.fr, probs->mv.fr, |
303 | sizeof(rkprobs->mv.fr)); |
304 | memcpy(rkprobs->mv.class0_hp, probs->mv.class0_hp, |
305 | sizeof(rkprobs->mv.class0_hp)); |
306 | memcpy(rkprobs->mv.hp, probs->mv.hp, |
307 | sizeof(rkprobs->mv.hp)); |
308 | } |
309 | |
310 | static void init_probs(struct rkvdec_ctx *ctx, |
311 | const struct rkvdec_vp9_run *run) |
312 | { |
313 | const struct v4l2_ctrl_vp9_frame *dec_params; |
314 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
315 | struct rkvdec_vp9_priv_tbl *tbl = vp9_ctx->priv_tbl.cpu; |
316 | struct rkvdec_vp9_probs *rkprobs = &tbl->probs; |
317 | const struct v4l2_vp9_segmentation *seg; |
318 | const struct v4l2_vp9_frame_context *probs; |
319 | bool intra_only; |
320 | |
321 | dec_params = run->decode_params; |
322 | probs = &vp9_ctx->probability_tables; |
323 | seg = &dec_params->seg; |
324 | |
325 | memset(rkprobs, 0, sizeof(*rkprobs)); |
326 | |
327 | intra_only = !!(dec_params->flags & |
328 | (V4L2_VP9_FRAME_FLAG_KEY_FRAME | |
329 | V4L2_VP9_FRAME_FLAG_INTRA_ONLY)); |
330 | |
331 | /* sb info 5 x 128 bit */ |
332 | memcpy(rkprobs->partition, |
333 | intra_only ? v4l2_vp9_kf_partition_probs : probs->partition, |
334 | sizeof(rkprobs->partition)); |
335 | |
336 | memcpy(rkprobs->pred, seg->pred_probs, sizeof(rkprobs->pred)); |
337 | memcpy(rkprobs->tree, seg->tree_probs, sizeof(rkprobs->tree)); |
338 | memcpy(rkprobs->skip, probs->skip, sizeof(rkprobs->skip)); |
339 | memcpy(rkprobs->tx32, probs->tx32, sizeof(rkprobs->tx32)); |
340 | memcpy(rkprobs->tx16, probs->tx16, sizeof(rkprobs->tx16)); |
341 | memcpy(rkprobs->tx8, probs->tx8, sizeof(rkprobs->tx8)); |
342 | memcpy(rkprobs->is_inter, probs->is_inter, sizeof(rkprobs->is_inter)); |
343 | |
344 | if (intra_only) |
345 | init_intra_only_probs(ctx, run); |
346 | else |
347 | init_inter_probs(ctx, run); |
348 | } |
349 | |
350 | struct rkvdec_vp9_ref_reg { |
351 | u32 reg_frm_size; |
352 | u32 reg_hor_stride; |
353 | u32 reg_y_stride; |
354 | u32 reg_yuv_stride; |
355 | u32 reg_ref_base; |
356 | }; |
357 | |
358 | static struct rkvdec_vp9_ref_reg ref_regs[] = { |
359 | { |
360 | .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(0), |
361 | .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(0), |
362 | .reg_y_stride = RKVDEC_VP9_LAST_FRAME_YSTRIDE, |
363 | .reg_yuv_stride = RKVDEC_VP9_LAST_FRAME_YUVSTRIDE, |
364 | .reg_ref_base = RKVDEC_REG_VP9_LAST_FRAME_BASE, |
365 | }, |
366 | { |
367 | .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(1), |
368 | .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(1), |
369 | .reg_y_stride = RKVDEC_VP9_GOLDEN_FRAME_YSTRIDE, |
370 | .reg_yuv_stride = 0, |
371 | .reg_ref_base = RKVDEC_REG_VP9_GOLDEN_FRAME_BASE, |
372 | }, |
373 | { |
374 | .reg_frm_size = RKVDEC_REG_VP9_FRAME_SIZE(2), |
375 | .reg_hor_stride = RKVDEC_VP9_HOR_VIRSTRIDE(2), |
376 | .reg_y_stride = RKVDEC_VP9_ALTREF_FRAME_YSTRIDE, |
377 | .reg_yuv_stride = 0, |
378 | .reg_ref_base = RKVDEC_REG_VP9_ALTREF_FRAME_BASE, |
379 | } |
380 | }; |
381 | |
382 | static struct rkvdec_decoded_buffer * |
383 | get_ref_buf(struct rkvdec_ctx *ctx, struct vb2_v4l2_buffer *dst, u64 timestamp) |
384 | { |
385 | struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; |
386 | struct vb2_queue *cap_q = &m2m_ctx->cap_q_ctx.q; |
387 | struct vb2_buffer *buf; |
388 | |
389 | /* |
390 | * If a ref is unused or invalid, address of current destination |
391 | * buffer is returned. |
392 | */ |
393 | buf = vb2_find_buffer(q: cap_q, timestamp); |
394 | if (!buf) |
395 | buf = &dst->vb2_buf; |
396 | |
397 | return vb2_to_rkvdec_decoded_buf(buf); |
398 | } |
399 | |
400 | static dma_addr_t get_mv_base_addr(struct rkvdec_decoded_buffer *buf) |
401 | { |
402 | unsigned int aligned_pitch, aligned_height, yuv_len; |
403 | |
404 | aligned_height = round_up(buf->vp9.height, 64); |
405 | aligned_pitch = round_up(buf->vp9.width * buf->vp9.bit_depth, 512) / 8; |
406 | yuv_len = (aligned_height * aligned_pitch * 3) / 2; |
407 | |
408 | return vb2_dma_contig_plane_dma_addr(vb: &buf->base.vb.vb2_buf, plane_no: 0) + |
409 | yuv_len; |
410 | } |
411 | |
412 | static void config_ref_registers(struct rkvdec_ctx *ctx, |
413 | const struct rkvdec_vp9_run *run, |
414 | struct rkvdec_decoded_buffer *ref_buf, |
415 | struct rkvdec_vp9_ref_reg *ref_reg) |
416 | { |
417 | unsigned int aligned_pitch, aligned_height, y_len, yuv_len; |
418 | struct rkvdec_dev *rkvdec = ctx->dev; |
419 | |
420 | aligned_height = round_up(ref_buf->vp9.height, 64); |
421 | writel_relaxed(RKVDEC_VP9_FRAMEWIDTH(ref_buf->vp9.width) | |
422 | RKVDEC_VP9_FRAMEHEIGHT(ref_buf->vp9.height), |
423 | rkvdec->regs + ref_reg->reg_frm_size); |
424 | |
425 | writel_relaxed(vb2_dma_contig_plane_dma_addr(&ref_buf->base.vb.vb2_buf, 0), |
426 | rkvdec->regs + ref_reg->reg_ref_base); |
427 | |
428 | if (&ref_buf->base.vb == run->base.bufs.dst) |
429 | return; |
430 | |
431 | aligned_pitch = round_up(ref_buf->vp9.width * ref_buf->vp9.bit_depth, 512) / 8; |
432 | y_len = aligned_height * aligned_pitch; |
433 | yuv_len = (y_len * 3) / 2; |
434 | |
435 | writel_relaxed(RKVDEC_HOR_Y_VIRSTRIDE(aligned_pitch / 16) | |
436 | RKVDEC_HOR_UV_VIRSTRIDE(aligned_pitch / 16), |
437 | rkvdec->regs + ref_reg->reg_hor_stride); |
438 | writel_relaxed(RKVDEC_VP9_REF_YSTRIDE(y_len / 16), |
439 | rkvdec->regs + ref_reg->reg_y_stride); |
440 | |
441 | if (!ref_reg->reg_yuv_stride) |
442 | return; |
443 | |
444 | writel_relaxed(RKVDEC_VP9_REF_YUVSTRIDE(yuv_len / 16), |
445 | rkvdec->regs + ref_reg->reg_yuv_stride); |
446 | } |
447 | |
448 | static void config_seg_registers(struct rkvdec_ctx *ctx, unsigned int segid) |
449 | { |
450 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
451 | const struct v4l2_vp9_segmentation *seg; |
452 | struct rkvdec_dev *rkvdec = ctx->dev; |
453 | s16 feature_val; |
454 | int feature_id; |
455 | u32 val = 0; |
456 | |
457 | seg = vp9_ctx->last.valid ? &vp9_ctx->last.seg : &vp9_ctx->cur.seg; |
458 | feature_id = V4L2_VP9_SEG_LVL_ALT_Q; |
459 | if (v4l2_vp9_seg_feat_enabled(feature_enabled: seg->feature_enabled, feature: feature_id, segid)) { |
460 | feature_val = seg->feature_data[segid][feature_id]; |
461 | val |= RKVDEC_SEGID_FRAME_QP_DELTA_EN(1) | |
462 | RKVDEC_SEGID_FRAME_QP_DELTA(feature_val); |
463 | } |
464 | |
465 | feature_id = V4L2_VP9_SEG_LVL_ALT_L; |
466 | if (v4l2_vp9_seg_feat_enabled(feature_enabled: seg->feature_enabled, feature: feature_id, segid)) { |
467 | feature_val = seg->feature_data[segid][feature_id]; |
468 | val |= RKVDEC_SEGID_FRAME_LOOPFILTER_VALUE_EN(1) | |
469 | RKVDEC_SEGID_FRAME_LOOPFILTER_VALUE(feature_val); |
470 | } |
471 | |
472 | feature_id = V4L2_VP9_SEG_LVL_REF_FRAME; |
473 | if (v4l2_vp9_seg_feat_enabled(feature_enabled: seg->feature_enabled, feature: feature_id, segid)) { |
474 | feature_val = seg->feature_data[segid][feature_id]; |
475 | val |= RKVDEC_SEGID_REFERINFO_EN(1) | |
476 | RKVDEC_SEGID_REFERINFO(feature_val); |
477 | } |
478 | |
479 | feature_id = V4L2_VP9_SEG_LVL_SKIP; |
480 | if (v4l2_vp9_seg_feat_enabled(feature_enabled: seg->feature_enabled, feature: feature_id, segid)) |
481 | val |= RKVDEC_SEGID_FRAME_SKIP_EN(1); |
482 | |
483 | if (!segid && |
484 | (seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ABS_OR_DELTA_UPDATE)) |
485 | val |= RKVDEC_SEGID_ABS_DELTA(1); |
486 | |
487 | writel_relaxed(val, rkvdec->regs + RKVDEC_VP9_SEGID_GRP(segid)); |
488 | } |
489 | |
490 | static void update_dec_buf_info(struct rkvdec_decoded_buffer *buf, |
491 | const struct v4l2_ctrl_vp9_frame *dec_params) |
492 | { |
493 | buf->vp9.width = dec_params->frame_width_minus_1 + 1; |
494 | buf->vp9.height = dec_params->frame_height_minus_1 + 1; |
495 | buf->vp9.bit_depth = dec_params->bit_depth; |
496 | } |
497 | |
498 | static void update_ctx_cur_info(struct rkvdec_vp9_ctx *vp9_ctx, |
499 | struct rkvdec_decoded_buffer *buf, |
500 | const struct v4l2_ctrl_vp9_frame *dec_params) |
501 | { |
502 | vp9_ctx->cur.valid = true; |
503 | vp9_ctx->cur.reference_mode = dec_params->reference_mode; |
504 | vp9_ctx->cur.interpolation_filter = dec_params->interpolation_filter; |
505 | vp9_ctx->cur.flags = dec_params->flags; |
506 | vp9_ctx->cur.timestamp = buf->base.vb.vb2_buf.timestamp; |
507 | vp9_ctx->cur.seg = dec_params->seg; |
508 | vp9_ctx->cur.lf = dec_params->lf; |
509 | } |
510 | |
511 | static void update_ctx_last_info(struct rkvdec_vp9_ctx *vp9_ctx) |
512 | { |
513 | vp9_ctx->last = vp9_ctx->cur; |
514 | } |
515 | |
516 | static void config_registers(struct rkvdec_ctx *ctx, |
517 | const struct rkvdec_vp9_run *run) |
518 | { |
519 | unsigned int y_len, uv_len, yuv_len, bit_depth, aligned_height, aligned_pitch, stream_len; |
520 | const struct v4l2_ctrl_vp9_frame *dec_params; |
521 | struct rkvdec_decoded_buffer *ref_bufs[3]; |
522 | struct rkvdec_decoded_buffer *dst, *last, *mv_ref; |
523 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
524 | u32 val, last_frame_info = 0; |
525 | const struct v4l2_vp9_segmentation *seg; |
526 | struct rkvdec_dev *rkvdec = ctx->dev; |
527 | dma_addr_t addr; |
528 | bool intra_only; |
529 | unsigned int i; |
530 | |
531 | dec_params = run->decode_params; |
532 | dst = vb2_to_rkvdec_decoded_buf(buf: &run->base.bufs.dst->vb2_buf); |
533 | ref_bufs[0] = get_ref_buf(ctx, dst: &dst->base.vb, timestamp: dec_params->last_frame_ts); |
534 | ref_bufs[1] = get_ref_buf(ctx, dst: &dst->base.vb, timestamp: dec_params->golden_frame_ts); |
535 | ref_bufs[2] = get_ref_buf(ctx, dst: &dst->base.vb, timestamp: dec_params->alt_frame_ts); |
536 | |
537 | if (vp9_ctx->last.valid) |
538 | last = get_ref_buf(ctx, dst: &dst->base.vb, timestamp: vp9_ctx->last.timestamp); |
539 | else |
540 | last = dst; |
541 | |
542 | update_dec_buf_info(buf: dst, dec_params); |
543 | update_ctx_cur_info(vp9_ctx, buf: dst, dec_params); |
544 | seg = &dec_params->seg; |
545 | |
546 | intra_only = !!(dec_params->flags & |
547 | (V4L2_VP9_FRAME_FLAG_KEY_FRAME | |
548 | V4L2_VP9_FRAME_FLAG_INTRA_ONLY)); |
549 | |
550 | writel_relaxed(RKVDEC_MODE(RKVDEC_MODE_VP9), |
551 | rkvdec->regs + RKVDEC_REG_SYSCTRL); |
552 | |
553 | bit_depth = dec_params->bit_depth; |
554 | aligned_height = round_up(ctx->decoded_fmt.fmt.pix_mp.height, 64); |
555 | |
556 | aligned_pitch = round_up(ctx->decoded_fmt.fmt.pix_mp.width * |
557 | bit_depth, |
558 | 512) / 8; |
559 | y_len = aligned_height * aligned_pitch; |
560 | uv_len = y_len / 2; |
561 | yuv_len = y_len + uv_len; |
562 | |
563 | writel_relaxed(RKVDEC_Y_HOR_VIRSTRIDE(aligned_pitch / 16) | |
564 | RKVDEC_UV_HOR_VIRSTRIDE(aligned_pitch / 16), |
565 | rkvdec->regs + RKVDEC_REG_PICPAR); |
566 | writel_relaxed(RKVDEC_Y_VIRSTRIDE(y_len / 16), |
567 | rkvdec->regs + RKVDEC_REG_Y_VIRSTRIDE); |
568 | writel_relaxed(RKVDEC_YUV_VIRSTRIDE(yuv_len / 16), |
569 | rkvdec->regs + RKVDEC_REG_YUV_VIRSTRIDE); |
570 | |
571 | stream_len = vb2_get_plane_payload(vb: &run->base.bufs.src->vb2_buf, plane_no: 0); |
572 | writel_relaxed(RKVDEC_STRM_LEN(stream_len), |
573 | rkvdec->regs + RKVDEC_REG_STRM_LEN); |
574 | |
575 | /* |
576 | * Reset count buffer, because decoder only output intra related syntax |
577 | * counts when decoding intra frame, but update entropy need to update |
578 | * all the probabilities. |
579 | */ |
580 | if (intra_only) |
581 | memset(vp9_ctx->count_tbl.cpu, 0, vp9_ctx->count_tbl.size); |
582 | |
583 | vp9_ctx->cur.segmapid = vp9_ctx->last.segmapid; |
584 | if (!intra_only && |
585 | !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) && |
586 | (!(seg->flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED) || |
587 | (seg->flags & V4L2_VP9_SEGMENTATION_FLAG_UPDATE_MAP))) |
588 | vp9_ctx->cur.segmapid++; |
589 | |
590 | for (i = 0; i < ARRAY_SIZE(ref_bufs); i++) |
591 | config_ref_registers(ctx, run, ref_buf: ref_bufs[i], ref_reg: &ref_regs[i]); |
592 | |
593 | for (i = 0; i < 8; i++) |
594 | config_seg_registers(ctx, segid: i); |
595 | |
596 | writel_relaxed(RKVDEC_VP9_TX_MODE(vp9_ctx->cur.tx_mode) | |
597 | RKVDEC_VP9_FRAME_REF_MODE(dec_params->reference_mode), |
598 | rkvdec->regs + RKVDEC_VP9_CPRHEADER_CONFIG); |
599 | |
600 | if (!intra_only) { |
601 | const struct v4l2_vp9_loop_filter *lf; |
602 | s8 delta; |
603 | |
604 | if (vp9_ctx->last.valid) |
605 | lf = &vp9_ctx->last.lf; |
606 | else |
607 | lf = &vp9_ctx->cur.lf; |
608 | |
609 | val = 0; |
610 | for (i = 0; i < ARRAY_SIZE(lf->ref_deltas); i++) { |
611 | delta = lf->ref_deltas[i]; |
612 | val |= RKVDEC_REF_DELTAS_LASTFRAME(i, delta); |
613 | } |
614 | |
615 | writel_relaxed(val, |
616 | rkvdec->regs + RKVDEC_VP9_REF_DELTAS_LASTFRAME); |
617 | |
618 | for (i = 0; i < ARRAY_SIZE(lf->mode_deltas); i++) { |
619 | delta = lf->mode_deltas[i]; |
620 | last_frame_info |= RKVDEC_MODE_DELTAS_LASTFRAME(i, |
621 | delta); |
622 | } |
623 | } |
624 | |
625 | if (vp9_ctx->last.valid && !intra_only && |
626 | vp9_ctx->last.seg.flags & V4L2_VP9_SEGMENTATION_FLAG_ENABLED) |
627 | last_frame_info |= RKVDEC_SEG_EN_LASTFRAME; |
628 | |
629 | if (vp9_ctx->last.valid && |
630 | vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_SHOW_FRAME) |
631 | last_frame_info |= RKVDEC_LAST_SHOW_FRAME; |
632 | |
633 | if (vp9_ctx->last.valid && |
634 | vp9_ctx->last.flags & |
635 | (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY)) |
636 | last_frame_info |= RKVDEC_LAST_INTRA_ONLY; |
637 | |
638 | if (vp9_ctx->last.valid && |
639 | last->vp9.width == dst->vp9.width && |
640 | last->vp9.height == dst->vp9.height) |
641 | last_frame_info |= RKVDEC_LAST_WIDHHEIGHT_EQCUR; |
642 | |
643 | writel_relaxed(last_frame_info, |
644 | rkvdec->regs + RKVDEC_VP9_INFO_LASTFRAME); |
645 | |
646 | writel_relaxed(stream_len - dec_params->compressed_header_size - |
647 | dec_params->uncompressed_header_size, |
648 | rkvdec->regs + RKVDEC_VP9_LASTTILE_SIZE); |
649 | |
650 | for (i = 0; !intra_only && i < ARRAY_SIZE(ref_bufs); i++) { |
651 | unsigned int refw = ref_bufs[i]->vp9.width; |
652 | unsigned int refh = ref_bufs[i]->vp9.height; |
653 | u32 hscale, vscale; |
654 | |
655 | hscale = (refw << 14) / dst->vp9.width; |
656 | vscale = (refh << 14) / dst->vp9.height; |
657 | writel_relaxed(RKVDEC_VP9_REF_HOR_SCALE(hscale) | |
658 | RKVDEC_VP9_REF_VER_SCALE(vscale), |
659 | rkvdec->regs + RKVDEC_VP9_REF_SCALE(i)); |
660 | } |
661 | |
662 | addr = vb2_dma_contig_plane_dma_addr(vb: &dst->base.vb.vb2_buf, plane_no: 0); |
663 | writel_relaxed(addr, rkvdec->regs + RKVDEC_REG_DECOUT_BASE); |
664 | addr = vb2_dma_contig_plane_dma_addr(vb: &run->base.bufs.src->vb2_buf, plane_no: 0); |
665 | writel_relaxed(addr, rkvdec->regs + RKVDEC_REG_STRM_RLC_BASE); |
666 | writel_relaxed(vp9_ctx->priv_tbl.dma + |
667 | offsetof(struct rkvdec_vp9_priv_tbl, probs), |
668 | rkvdec->regs + RKVDEC_REG_CABACTBL_PROB_BASE); |
669 | writel_relaxed(vp9_ctx->count_tbl.dma, |
670 | rkvdec->regs + RKVDEC_REG_VP9COUNT_BASE); |
671 | |
672 | writel_relaxed(vp9_ctx->priv_tbl.dma + |
673 | offsetof(struct rkvdec_vp9_priv_tbl, segmap) + |
674 | (RKVDEC_VP9_MAX_SEGMAP_SIZE * vp9_ctx->cur.segmapid), |
675 | rkvdec->regs + RKVDEC_REG_VP9_SEGIDCUR_BASE); |
676 | writel_relaxed(vp9_ctx->priv_tbl.dma + |
677 | offsetof(struct rkvdec_vp9_priv_tbl, segmap) + |
678 | (RKVDEC_VP9_MAX_SEGMAP_SIZE * (!vp9_ctx->cur.segmapid)), |
679 | rkvdec->regs + RKVDEC_REG_VP9_SEGIDLAST_BASE); |
680 | |
681 | if (!intra_only && |
682 | !(dec_params->flags & V4L2_VP9_FRAME_FLAG_ERROR_RESILIENT) && |
683 | vp9_ctx->last.valid) |
684 | mv_ref = last; |
685 | else |
686 | mv_ref = dst; |
687 | |
688 | writel_relaxed(get_mv_base_addr(mv_ref), |
689 | rkvdec->regs + RKVDEC_VP9_REF_COLMV_BASE); |
690 | |
691 | writel_relaxed(ctx->decoded_fmt.fmt.pix_mp.width | |
692 | (ctx->decoded_fmt.fmt.pix_mp.height << 16), |
693 | rkvdec->regs + RKVDEC_REG_PERFORMANCE_CYCLE); |
694 | } |
695 | |
696 | static int validate_dec_params(struct rkvdec_ctx *ctx, |
697 | const struct v4l2_ctrl_vp9_frame *dec_params) |
698 | { |
699 | unsigned int aligned_width, aligned_height; |
700 | |
701 | /* We only support profile 0. */ |
702 | if (dec_params->profile != 0) { |
703 | dev_err(ctx->dev->dev, "unsupported profile %d\n" , |
704 | dec_params->profile); |
705 | return -EINVAL; |
706 | } |
707 | |
708 | aligned_width = round_up(dec_params->frame_width_minus_1 + 1, 64); |
709 | aligned_height = round_up(dec_params->frame_height_minus_1 + 1, 64); |
710 | |
711 | /* |
712 | * Userspace should update the capture/decoded format when the |
713 | * resolution changes. |
714 | */ |
715 | if (aligned_width != ctx->decoded_fmt.fmt.pix_mp.width || |
716 | aligned_height != ctx->decoded_fmt.fmt.pix_mp.height) { |
717 | dev_err(ctx->dev->dev, |
718 | "unexpected bitstream resolution %dx%d\n" , |
719 | dec_params->frame_width_minus_1 + 1, |
720 | dec_params->frame_height_minus_1 + 1); |
721 | return -EINVAL; |
722 | } |
723 | |
724 | return 0; |
725 | } |
726 | |
727 | static int rkvdec_vp9_run_preamble(struct rkvdec_ctx *ctx, |
728 | struct rkvdec_vp9_run *run) |
729 | { |
730 | const struct v4l2_ctrl_vp9_frame *dec_params; |
731 | const struct v4l2_ctrl_vp9_compressed_hdr *prob_updates; |
732 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
733 | struct v4l2_ctrl *ctrl; |
734 | unsigned int fctx_idx; |
735 | int ret; |
736 | |
737 | /* v4l2-specific stuff */ |
738 | rkvdec_run_preamble(ctx, run: &run->base); |
739 | |
740 | ctrl = v4l2_ctrl_find(hdl: &ctx->ctrl_hdl, |
741 | V4L2_CID_STATELESS_VP9_FRAME); |
742 | if (WARN_ON(!ctrl)) |
743 | return -EINVAL; |
744 | dec_params = ctrl->p_cur.p; |
745 | |
746 | ret = validate_dec_params(ctx, dec_params); |
747 | if (ret) |
748 | return ret; |
749 | |
750 | run->decode_params = dec_params; |
751 | |
752 | ctrl = v4l2_ctrl_find(hdl: &ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_COMPRESSED_HDR); |
753 | if (WARN_ON(!ctrl)) |
754 | return -EINVAL; |
755 | prob_updates = ctrl->p_cur.p; |
756 | vp9_ctx->cur.tx_mode = prob_updates->tx_mode; |
757 | |
758 | /* |
759 | * vp9 stuff |
760 | * |
761 | * by this point the userspace has done all parts of 6.2 uncompressed_header() |
762 | * except this fragment: |
763 | * if ( FrameIsIntra || error_resilient_mode ) { |
764 | * setup_past_independence ( ) |
765 | * if ( frame_type == KEY_FRAME || error_resilient_mode == 1 || |
766 | * reset_frame_context == 3 ) { |
767 | * for ( i = 0; i < 4; i ++ ) { |
768 | * save_probs( i ) |
769 | * } |
770 | * } else if ( reset_frame_context == 2 ) { |
771 | * save_probs( frame_context_idx ) |
772 | * } |
773 | * frame_context_idx = 0 |
774 | * } |
775 | */ |
776 | fctx_idx = v4l2_vp9_reset_frame_ctx(dec_params, frame_context: vp9_ctx->frame_context); |
777 | vp9_ctx->cur.frame_context_idx = fctx_idx; |
778 | |
779 | /* 6.1 frame(sz): load_probs() and load_probs2() */ |
780 | vp9_ctx->probability_tables = vp9_ctx->frame_context[fctx_idx]; |
781 | |
782 | /* |
783 | * The userspace has also performed 6.3 compressed_header(), but handling the |
784 | * probs in a special way. All probs which need updating, except MV-related, |
785 | * have been read from the bitstream and translated through inv_map_table[], |
786 | * but no 6.3.6 inv_recenter_nonneg(v, m) has been performed. The values passed |
787 | * by userspace are either translated values (there are no 0 values in |
788 | * inv_map_table[]), or zero to indicate no update. All MV-related probs which need |
789 | * updating have been read from the bitstream and (mv_prob << 1) | 1 has been |
790 | * performed. The values passed by userspace are either new values |
791 | * to replace old ones (the above mentioned shift and bitwise or never result in |
792 | * a zero) or zero to indicate no update. |
793 | * fw_update_probs() performs actual probs updates or leaves probs as-is |
794 | * for values for which a zero was passed from userspace. |
795 | */ |
796 | v4l2_vp9_fw_update_probs(probs: &vp9_ctx->probability_tables, deltas: prob_updates, dec_params); |
797 | |
798 | return 0; |
799 | } |
800 | |
801 | static int rkvdec_vp9_run(struct rkvdec_ctx *ctx) |
802 | { |
803 | struct rkvdec_dev *rkvdec = ctx->dev; |
804 | struct rkvdec_vp9_run run = { }; |
805 | int ret; |
806 | |
807 | ret = rkvdec_vp9_run_preamble(ctx, run: &run); |
808 | if (ret) { |
809 | rkvdec_run_postamble(ctx, run: &run.base); |
810 | return ret; |
811 | } |
812 | |
813 | /* Prepare probs. */ |
814 | init_probs(ctx, run: &run); |
815 | |
816 | /* Configure hardware registers. */ |
817 | config_registers(ctx, run: &run); |
818 | |
819 | rkvdec_run_postamble(ctx, run: &run.base); |
820 | |
821 | schedule_delayed_work(dwork: &rkvdec->watchdog_work, delay: msecs_to_jiffies(m: 2000)); |
822 | |
823 | writel(val: 1, addr: rkvdec->regs + RKVDEC_REG_PREF_LUMA_CACHE_COMMAND); |
824 | writel(val: 1, addr: rkvdec->regs + RKVDEC_REG_PREF_CHR_CACHE_COMMAND); |
825 | |
826 | writel(val: 0xe, addr: rkvdec->regs + RKVDEC_REG_STRMD_ERR_EN); |
827 | /* Start decoding! */ |
828 | writel(RKVDEC_INTERRUPT_DEC_E | RKVDEC_CONFIG_DEC_CLK_GATE_E | |
829 | RKVDEC_TIMEOUT_E | RKVDEC_BUF_EMPTY_E, |
830 | addr: rkvdec->regs + RKVDEC_REG_INTERRUPT); |
831 | |
832 | return 0; |
833 | } |
834 | |
835 | #define copy_tx_and_skip(p1, p2) \ |
836 | do { \ |
837 | memcpy((p1)->tx8, (p2)->tx8, sizeof((p1)->tx8)); \ |
838 | memcpy((p1)->tx16, (p2)->tx16, sizeof((p1)->tx16)); \ |
839 | memcpy((p1)->tx32, (p2)->tx32, sizeof((p1)->tx32)); \ |
840 | memcpy((p1)->skip, (p2)->skip, sizeof((p1)->skip)); \ |
841 | } while (0) |
842 | |
843 | static void rkvdec_vp9_done(struct rkvdec_ctx *ctx, |
844 | struct vb2_v4l2_buffer *src_buf, |
845 | struct vb2_v4l2_buffer *dst_buf, |
846 | enum vb2_buffer_state result) |
847 | { |
848 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
849 | unsigned int fctx_idx; |
850 | |
851 | /* v4l2-specific stuff */ |
852 | if (result == VB2_BUF_STATE_ERROR) |
853 | goto out_update_last; |
854 | |
855 | /* |
856 | * vp9 stuff |
857 | * |
858 | * 6.1.2 refresh_probs() |
859 | * |
860 | * In the spec a complementary condition goes last in 6.1.2 refresh_probs(), |
861 | * but it makes no sense to perform all the activities from the first "if" |
862 | * there if we actually are not refreshing the frame context. On top of that, |
863 | * because of 6.2 uncompressed_header() whenever error_resilient_mode == 1, |
864 | * refresh_frame_context == 0. Consequently, if we don't jump to out_update_last |
865 | * it means error_resilient_mode must be 0. |
866 | */ |
867 | if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_REFRESH_FRAME_CTX)) |
868 | goto out_update_last; |
869 | |
870 | fctx_idx = vp9_ctx->cur.frame_context_idx; |
871 | |
872 | if (!(vp9_ctx->cur.flags & V4L2_VP9_FRAME_FLAG_PARALLEL_DEC_MODE)) { |
873 | /* error_resilient_mode == 0 && frame_parallel_decoding_mode == 0 */ |
874 | struct v4l2_vp9_frame_context *probs = &vp9_ctx->probability_tables; |
875 | bool frame_is_intra = vp9_ctx->cur.flags & |
876 | (V4L2_VP9_FRAME_FLAG_KEY_FRAME | V4L2_VP9_FRAME_FLAG_INTRA_ONLY); |
877 | struct tx_and_skip { |
878 | u8 tx8[2][1]; |
879 | u8 tx16[2][2]; |
880 | u8 tx32[2][3]; |
881 | u8 skip[3]; |
882 | } _tx_skip, *tx_skip = &_tx_skip; |
883 | struct v4l2_vp9_frame_symbol_counts *counts; |
884 | |
885 | /* buffer the forward-updated TX and skip probs */ |
886 | if (frame_is_intra) |
887 | copy_tx_and_skip(tx_skip, probs); |
888 | |
889 | /* 6.1.2 refresh_probs(): load_probs() and load_probs2() */ |
890 | *probs = vp9_ctx->frame_context[fctx_idx]; |
891 | |
892 | /* if FrameIsIntra then undo the effect of load_probs2() */ |
893 | if (frame_is_intra) |
894 | copy_tx_and_skip(probs, tx_skip); |
895 | |
896 | counts = frame_is_intra ? &vp9_ctx->intra_cnts : &vp9_ctx->inter_cnts; |
897 | v4l2_vp9_adapt_coef_probs(probs, counts, |
898 | use_128: !vp9_ctx->last.valid || |
899 | vp9_ctx->last.flags & V4L2_VP9_FRAME_FLAG_KEY_FRAME, |
900 | frame_is_intra); |
901 | if (!frame_is_intra) { |
902 | const struct rkvdec_vp9_inter_frame_symbol_counts *inter_cnts; |
903 | u32 classes[2][11]; |
904 | int i; |
905 | |
906 | inter_cnts = vp9_ctx->count_tbl.cpu; |
907 | for (i = 0; i < ARRAY_SIZE(classes); ++i) |
908 | memcpy(classes[i], inter_cnts->classes[i], sizeof(classes[0])); |
909 | counts->classes = &classes; |
910 | |
911 | /* load_probs2() already done */ |
912 | v4l2_vp9_adapt_noncoef_probs(probs: &vp9_ctx->probability_tables, counts, |
913 | reference_mode: vp9_ctx->cur.reference_mode, |
914 | interpolation_filter: vp9_ctx->cur.interpolation_filter, |
915 | tx_mode: vp9_ctx->cur.tx_mode, flags: vp9_ctx->cur.flags); |
916 | } |
917 | } |
918 | |
919 | /* 6.1.2 refresh_probs(): save_probs(fctx_idx) */ |
920 | vp9_ctx->frame_context[fctx_idx] = vp9_ctx->probability_tables; |
921 | |
922 | out_update_last: |
923 | update_ctx_last_info(vp9_ctx); |
924 | } |
925 | |
926 | static void rkvdec_init_v4l2_vp9_count_tbl(struct rkvdec_ctx *ctx) |
927 | { |
928 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
929 | struct rkvdec_vp9_intra_frame_symbol_counts *intra_cnts = vp9_ctx->count_tbl.cpu; |
930 | struct rkvdec_vp9_inter_frame_symbol_counts *inter_cnts = vp9_ctx->count_tbl.cpu; |
931 | int i, j, k, l, m; |
932 | |
933 | vp9_ctx->inter_cnts.partition = &inter_cnts->partition; |
934 | vp9_ctx->inter_cnts.skip = &inter_cnts->skip; |
935 | vp9_ctx->inter_cnts.intra_inter = &inter_cnts->inter; |
936 | vp9_ctx->inter_cnts.tx32p = &inter_cnts->tx32p; |
937 | vp9_ctx->inter_cnts.tx16p = &inter_cnts->tx16p; |
938 | vp9_ctx->inter_cnts.tx8p = &inter_cnts->tx8p; |
939 | |
940 | vp9_ctx->intra_cnts.partition = (u32 (*)[16][4])(&intra_cnts->partition); |
941 | vp9_ctx->intra_cnts.skip = &intra_cnts->skip; |
942 | vp9_ctx->intra_cnts.intra_inter = &intra_cnts->intra; |
943 | vp9_ctx->intra_cnts.tx32p = &intra_cnts->tx32p; |
944 | vp9_ctx->intra_cnts.tx16p = &intra_cnts->tx16p; |
945 | vp9_ctx->intra_cnts.tx8p = &intra_cnts->tx8p; |
946 | |
947 | vp9_ctx->inter_cnts.y_mode = &inter_cnts->y_mode; |
948 | vp9_ctx->inter_cnts.uv_mode = &inter_cnts->uv_mode; |
949 | vp9_ctx->inter_cnts.comp = &inter_cnts->comp; |
950 | vp9_ctx->inter_cnts.comp_ref = &inter_cnts->comp_ref; |
951 | vp9_ctx->inter_cnts.single_ref = &inter_cnts->single_ref; |
952 | vp9_ctx->inter_cnts.mv_mode = &inter_cnts->mv_mode; |
953 | vp9_ctx->inter_cnts.filter = &inter_cnts->filter; |
954 | vp9_ctx->inter_cnts.mv_joint = &inter_cnts->mv_joint; |
955 | vp9_ctx->inter_cnts.sign = &inter_cnts->sign; |
956 | /* |
957 | * rk hardware actually uses "u32 classes[2][11 + 1];" |
958 | * instead of "u32 classes[2][11];", so this must be explicitly |
959 | * copied into vp9_ctx->classes when passing the data to the |
960 | * vp9 library function |
961 | */ |
962 | vp9_ctx->inter_cnts.class0 = &inter_cnts->class0; |
963 | vp9_ctx->inter_cnts.bits = &inter_cnts->bits; |
964 | vp9_ctx->inter_cnts.class0_fp = &inter_cnts->class0_fp; |
965 | vp9_ctx->inter_cnts.fp = &inter_cnts->fp; |
966 | vp9_ctx->inter_cnts.class0_hp = &inter_cnts->class0_hp; |
967 | vp9_ctx->inter_cnts.hp = &inter_cnts->hp; |
968 | |
969 | #define INNERMOST_LOOP \ |
970 | do { \ |
971 | for (m = 0; m < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0][0][0]); ++m) {\ |
972 | vp9_ctx->inter_cnts.coeff[i][j][k][l][m] = \ |
973 | &inter_cnts->ref_cnt[k][i][j][l][m].coeff; \ |
974 | vp9_ctx->inter_cnts.eob[i][j][k][l][m][0] = \ |
975 | &inter_cnts->ref_cnt[k][i][j][l][m].eob[0]; \ |
976 | vp9_ctx->inter_cnts.eob[i][j][k][l][m][1] = \ |
977 | &inter_cnts->ref_cnt[k][i][j][l][m].eob[1]; \ |
978 | \ |
979 | vp9_ctx->intra_cnts.coeff[i][j][k][l][m] = \ |
980 | &intra_cnts->ref_cnt[k][i][j][l][m].coeff; \ |
981 | vp9_ctx->intra_cnts.eob[i][j][k][l][m][0] = \ |
982 | &intra_cnts->ref_cnt[k][i][j][l][m].eob[0]; \ |
983 | vp9_ctx->intra_cnts.eob[i][j][k][l][m][1] = \ |
984 | &intra_cnts->ref_cnt[k][i][j][l][m].eob[1]; \ |
985 | } \ |
986 | } while (0) |
987 | |
988 | for (i = 0; i < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff); ++i) |
989 | for (j = 0; j < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0]); ++j) |
990 | for (k = 0; k < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0]); ++k) |
991 | for (l = 0; l < ARRAY_SIZE(vp9_ctx->inter_cnts.coeff[0][0][0]); ++l) |
992 | INNERMOST_LOOP; |
993 | #undef INNERMOST_LOOP |
994 | } |
995 | |
996 | static int rkvdec_vp9_start(struct rkvdec_ctx *ctx) |
997 | { |
998 | struct rkvdec_dev *rkvdec = ctx->dev; |
999 | struct rkvdec_vp9_priv_tbl *priv_tbl; |
1000 | struct rkvdec_vp9_ctx *vp9_ctx; |
1001 | unsigned char *count_tbl; |
1002 | int ret; |
1003 | |
1004 | vp9_ctx = kzalloc(size: sizeof(*vp9_ctx), GFP_KERNEL); |
1005 | if (!vp9_ctx) |
1006 | return -ENOMEM; |
1007 | |
1008 | ctx->priv = vp9_ctx; |
1009 | |
1010 | BUILD_BUG_ON(sizeof(priv_tbl->probs) % 16); /* ensure probs size is 128-bit aligned */ |
1011 | priv_tbl = dma_alloc_coherent(dev: rkvdec->dev, size: sizeof(*priv_tbl), |
1012 | dma_handle: &vp9_ctx->priv_tbl.dma, GFP_KERNEL); |
1013 | if (!priv_tbl) { |
1014 | ret = -ENOMEM; |
1015 | goto err_free_ctx; |
1016 | } |
1017 | |
1018 | vp9_ctx->priv_tbl.size = sizeof(*priv_tbl); |
1019 | vp9_ctx->priv_tbl.cpu = priv_tbl; |
1020 | |
1021 | count_tbl = dma_alloc_coherent(dev: rkvdec->dev, RKVDEC_VP9_COUNT_SIZE, |
1022 | dma_handle: &vp9_ctx->count_tbl.dma, GFP_KERNEL); |
1023 | if (!count_tbl) { |
1024 | ret = -ENOMEM; |
1025 | goto err_free_priv_tbl; |
1026 | } |
1027 | |
1028 | vp9_ctx->count_tbl.size = RKVDEC_VP9_COUNT_SIZE; |
1029 | vp9_ctx->count_tbl.cpu = count_tbl; |
1030 | rkvdec_init_v4l2_vp9_count_tbl(ctx); |
1031 | |
1032 | return 0; |
1033 | |
1034 | err_free_priv_tbl: |
1035 | dma_free_coherent(dev: rkvdec->dev, size: vp9_ctx->priv_tbl.size, |
1036 | cpu_addr: vp9_ctx->priv_tbl.cpu, dma_handle: vp9_ctx->priv_tbl.dma); |
1037 | |
1038 | err_free_ctx: |
1039 | kfree(objp: vp9_ctx); |
1040 | return ret; |
1041 | } |
1042 | |
1043 | static void rkvdec_vp9_stop(struct rkvdec_ctx *ctx) |
1044 | { |
1045 | struct rkvdec_vp9_ctx *vp9_ctx = ctx->priv; |
1046 | struct rkvdec_dev *rkvdec = ctx->dev; |
1047 | |
1048 | dma_free_coherent(dev: rkvdec->dev, size: vp9_ctx->count_tbl.size, |
1049 | cpu_addr: vp9_ctx->count_tbl.cpu, dma_handle: vp9_ctx->count_tbl.dma); |
1050 | dma_free_coherent(dev: rkvdec->dev, size: vp9_ctx->priv_tbl.size, |
1051 | cpu_addr: vp9_ctx->priv_tbl.cpu, dma_handle: vp9_ctx->priv_tbl.dma); |
1052 | kfree(objp: vp9_ctx); |
1053 | } |
1054 | |
1055 | static int rkvdec_vp9_adjust_fmt(struct rkvdec_ctx *ctx, |
1056 | struct v4l2_format *f) |
1057 | { |
1058 | struct v4l2_pix_format_mplane *fmt = &f->fmt.pix_mp; |
1059 | |
1060 | fmt->num_planes = 1; |
1061 | if (!fmt->plane_fmt[0].sizeimage) |
1062 | fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height * 2; |
1063 | return 0; |
1064 | } |
1065 | |
1066 | const struct rkvdec_coded_fmt_ops rkvdec_vp9_fmt_ops = { |
1067 | .adjust_fmt = rkvdec_vp9_adjust_fmt, |
1068 | .start = rkvdec_vp9_start, |
1069 | .stop = rkvdec_vp9_stop, |
1070 | .run = rkvdec_vp9_run, |
1071 | .done = rkvdec_vp9_done, |
1072 | }; |
1073 | |