1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hantro VP9 codec driver |
4 | * |
5 | * Copyright (C) 2021 Collabora Ltd. |
6 | */ |
7 | |
8 | #include <linux/types.h> |
9 | #include <media/v4l2-mem2mem.h> |
10 | |
11 | #include "hantro.h" |
12 | #include "hantro_hw.h" |
13 | #include "hantro_vp9.h" |
14 | |
15 | #define POW2(x) (1 << (x)) |
16 | |
17 | #define MAX_LOG2_TILE_COLUMNS 6 |
18 | #define MAX_NUM_TILE_COLS POW2(MAX_LOG2_TILE_COLUMNS) |
19 | #define MAX_TILE_COLS 20 |
20 | #define MAX_TILE_ROWS 22 |
21 | |
22 | static size_t hantro_vp9_tile_filter_size(unsigned int height) |
23 | { |
24 | u32 h, height32, size; |
25 | |
26 | h = roundup(height, 8); |
27 | |
28 | height32 = roundup(h, 64); |
29 | size = 24 * height32 * (MAX_NUM_TILE_COLS - 1); /* luma: 8, chroma: 8 + 8 */ |
30 | |
31 | return size; |
32 | } |
33 | |
34 | static size_t hantro_vp9_bsd_control_size(unsigned int height) |
35 | { |
36 | u32 h, height32; |
37 | |
38 | h = roundup(height, 8); |
39 | height32 = roundup(h, 64); |
40 | |
41 | return 16 * (height32 / 4) * (MAX_NUM_TILE_COLS - 1); |
42 | } |
43 | |
44 | static size_t hantro_vp9_segment_map_size(unsigned int width, unsigned int height) |
45 | { |
46 | u32 w, h; |
47 | int num_ctbs; |
48 | |
49 | w = roundup(width, 8); |
50 | h = roundup(height, 8); |
51 | num_ctbs = ((w + 63) / 64) * ((h + 63) / 64); |
52 | |
53 | return num_ctbs * 32; |
54 | } |
55 | |
56 | static inline size_t hantro_vp9_prob_tab_size(void) |
57 | { |
58 | return roundup(sizeof(struct hantro_g2_all_probs), 16); |
59 | } |
60 | |
61 | static inline size_t hantro_vp9_count_tab_size(void) |
62 | { |
63 | return roundup(sizeof(struct symbol_counts), 16); |
64 | } |
65 | |
66 | static inline size_t hantro_vp9_tile_info_size(void) |
67 | { |
68 | return roundup((MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 15 + 16) & ~0xf, 16); |
69 | } |
70 | |
71 | static void *get_coeffs_arr(struct symbol_counts *cnts, int i, int j, int k, int l, int m) |
72 | { |
73 | if (i == 0) |
74 | return &cnts->count_coeffs[j][k][l][m]; |
75 | |
76 | if (i == 1) |
77 | return &cnts->count_coeffs8x8[j][k][l][m]; |
78 | |
79 | if (i == 2) |
80 | return &cnts->count_coeffs16x16[j][k][l][m]; |
81 | |
82 | if (i == 3) |
83 | return &cnts->count_coeffs32x32[j][k][l][m]; |
84 | |
85 | return NULL; |
86 | } |
87 | |
88 | static void *get_eobs1(struct symbol_counts *cnts, int i, int j, int k, int l, int m) |
89 | { |
90 | if (i == 0) |
91 | return &cnts->count_coeffs[j][k][l][m][3]; |
92 | |
93 | if (i == 1) |
94 | return &cnts->count_coeffs8x8[j][k][l][m][3]; |
95 | |
96 | if (i == 2) |
97 | return &cnts->count_coeffs16x16[j][k][l][m][3]; |
98 | |
99 | if (i == 3) |
100 | return &cnts->count_coeffs32x32[j][k][l][m][3]; |
101 | |
102 | return NULL; |
103 | } |
104 | |
105 | #define INNER_LOOP \ |
106 | do { \ |
107 | for (m = 0; m < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0][0]); ++m) { \ |
108 | vp9_ctx->cnts.coeff[i][j][k][l][m] = \ |
109 | get_coeffs_arr(cnts, i, j, k, l, m); \ |
110 | vp9_ctx->cnts.eob[i][j][k][l][m][0] = \ |
111 | &cnts->count_eobs[i][j][k][l][m]; \ |
112 | vp9_ctx->cnts.eob[i][j][k][l][m][1] = \ |
113 | get_eobs1(cnts, i, j, k, l, m); \ |
114 | } \ |
115 | } while (0) |
116 | |
117 | static void init_v4l2_vp9_count_tbl(struct hantro_ctx *ctx) |
118 | { |
119 | struct hantro_vp9_dec_hw_ctx *vp9_ctx = &ctx->vp9_dec; |
120 | struct symbol_counts *cnts = vp9_ctx->misc.cpu + vp9_ctx->ctx_counters_offset; |
121 | int i, j, k, l, m; |
122 | |
123 | vp9_ctx->cnts.partition = &cnts->partition_counts; |
124 | vp9_ctx->cnts.skip = &cnts->mbskip_count; |
125 | vp9_ctx->cnts.intra_inter = &cnts->intra_inter_count; |
126 | vp9_ctx->cnts.tx32p = &cnts->tx32x32_count; |
127 | /* |
128 | * g2 hardware uses tx16x16_count[2][3], while the api |
129 | * expects tx16p[2][4], so this must be explicitly copied |
130 | * into vp9_ctx->cnts.tx16p when passing the data to the |
131 | * vp9 library function |
132 | */ |
133 | vp9_ctx->cnts.tx8p = &cnts->tx8x8_count; |
134 | |
135 | vp9_ctx->cnts.y_mode = &cnts->sb_ymode_counts; |
136 | vp9_ctx->cnts.uv_mode = &cnts->uv_mode_counts; |
137 | vp9_ctx->cnts.comp = &cnts->comp_inter_count; |
138 | vp9_ctx->cnts.comp_ref = &cnts->comp_ref_count; |
139 | vp9_ctx->cnts.single_ref = &cnts->single_ref_count; |
140 | vp9_ctx->cnts.filter = &cnts->switchable_interp_counts; |
141 | vp9_ctx->cnts.mv_joint = &cnts->mv_counts.joints; |
142 | vp9_ctx->cnts.sign = &cnts->mv_counts.sign; |
143 | vp9_ctx->cnts.classes = &cnts->mv_counts.classes; |
144 | vp9_ctx->cnts.class0 = &cnts->mv_counts.class0; |
145 | vp9_ctx->cnts.bits = &cnts->mv_counts.bits; |
146 | vp9_ctx->cnts.class0_fp = &cnts->mv_counts.class0_fp; |
147 | vp9_ctx->cnts.fp = &cnts->mv_counts.fp; |
148 | vp9_ctx->cnts.class0_hp = &cnts->mv_counts.class0_hp; |
149 | vp9_ctx->cnts.hp = &cnts->mv_counts.hp; |
150 | |
151 | for (i = 0; i < ARRAY_SIZE(vp9_ctx->cnts.coeff); ++i) |
152 | for (j = 0; j < ARRAY_SIZE(vp9_ctx->cnts.coeff[i]); ++j) |
153 | for (k = 0; k < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0]); ++k) |
154 | for (l = 0; l < ARRAY_SIZE(vp9_ctx->cnts.coeff[i][0][0]); ++l) |
155 | INNER_LOOP; |
156 | } |
157 | |
158 | int hantro_vp9_dec_init(struct hantro_ctx *ctx) |
159 | { |
160 | struct hantro_dev *vpu = ctx->dev; |
161 | const struct hantro_variant *variant = vpu->variant; |
162 | struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; |
163 | struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; |
164 | struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; |
165 | struct hantro_aux_buf *misc = &vp9_dec->misc; |
166 | u32 i, max_width, max_height, size; |
167 | |
168 | if (variant->num_dec_fmts < 1) |
169 | return -EINVAL; |
170 | |
171 | for (i = 0; i < variant->num_dec_fmts; ++i) |
172 | if (variant->dec_fmts[i].fourcc == V4L2_PIX_FMT_VP9_FRAME) |
173 | break; |
174 | |
175 | if (i == variant->num_dec_fmts) |
176 | return -EINVAL; |
177 | |
178 | max_width = vpu->variant->dec_fmts[i].frmsize.max_width; |
179 | max_height = vpu->variant->dec_fmts[i].frmsize.max_height; |
180 | |
181 | size = hantro_vp9_tile_filter_size(height: max_height); |
182 | vp9_dec->bsd_ctrl_offset = size; |
183 | size += hantro_vp9_bsd_control_size(height: max_height); |
184 | |
185 | tile_edge->cpu = dma_alloc_coherent(dev: vpu->dev, size, dma_handle: &tile_edge->dma, GFP_KERNEL); |
186 | if (!tile_edge->cpu) |
187 | return -ENOMEM; |
188 | |
189 | tile_edge->size = size; |
190 | memset(tile_edge->cpu, 0, size); |
191 | |
192 | size = hantro_vp9_segment_map_size(width: max_width, height: max_height); |
193 | vp9_dec->segment_map_size = size; |
194 | size *= 2; /* we need two areas of this size, used alternately */ |
195 | |
196 | segment_map->cpu = dma_alloc_coherent(dev: vpu->dev, size, dma_handle: &segment_map->dma, GFP_KERNEL); |
197 | if (!segment_map->cpu) |
198 | goto err_segment_map; |
199 | |
200 | segment_map->size = size; |
201 | memset(segment_map->cpu, 0, size); |
202 | |
203 | size = hantro_vp9_prob_tab_size(); |
204 | vp9_dec->ctx_counters_offset = size; |
205 | size += hantro_vp9_count_tab_size(); |
206 | vp9_dec->tile_info_offset = size; |
207 | size += hantro_vp9_tile_info_size(); |
208 | |
209 | misc->cpu = dma_alloc_coherent(dev: vpu->dev, size, dma_handle: &misc->dma, GFP_KERNEL); |
210 | if (!misc->cpu) |
211 | goto err_misc; |
212 | |
213 | misc->size = size; |
214 | memset(misc->cpu, 0, size); |
215 | |
216 | init_v4l2_vp9_count_tbl(ctx); |
217 | |
218 | return 0; |
219 | |
220 | err_misc: |
221 | dma_free_coherent(dev: vpu->dev, size: segment_map->size, cpu_addr: segment_map->cpu, dma_handle: segment_map->dma); |
222 | |
223 | err_segment_map: |
224 | dma_free_coherent(dev: vpu->dev, size: tile_edge->size, cpu_addr: tile_edge->cpu, dma_handle: tile_edge->dma); |
225 | |
226 | return -ENOMEM; |
227 | } |
228 | |
229 | void hantro_vp9_dec_exit(struct hantro_ctx *ctx) |
230 | { |
231 | struct hantro_dev *vpu = ctx->dev; |
232 | struct hantro_vp9_dec_hw_ctx *vp9_dec = &ctx->vp9_dec; |
233 | struct hantro_aux_buf *tile_edge = &vp9_dec->tile_edge; |
234 | struct hantro_aux_buf *segment_map = &vp9_dec->segment_map; |
235 | struct hantro_aux_buf *misc = &vp9_dec->misc; |
236 | |
237 | dma_free_coherent(dev: vpu->dev, size: misc->size, cpu_addr: misc->cpu, dma_handle: misc->dma); |
238 | dma_free_coherent(dev: vpu->dev, size: segment_map->size, cpu_addr: segment_map->cpu, dma_handle: segment_map->dma); |
239 | dma_free_coherent(dev: vpu->dev, size: tile_edge->size, cpu_addr: tile_edge->cpu, dma_handle: tile_edge->dma); |
240 | } |
241 | |