1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Hantro G1 post-processor support |
4 | * |
5 | * Copyright (C) 2019 Collabora, Ltd. |
6 | */ |
7 | |
8 | #include <linux/dma-mapping.h> |
9 | #include <linux/types.h> |
10 | |
11 | #include "hantro.h" |
12 | #include "hantro_hw.h" |
13 | #include "hantro_g1_regs.h" |
14 | #include "hantro_g2_regs.h" |
15 | #include "hantro_v4l2.h" |
16 | |
17 | #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \ |
18 | { \ |
19 | hantro_reg_write(vpu, \ |
20 | &hantro_g1_postproc_regs.reg_name, \ |
21 | val); \ |
22 | } |
23 | |
24 | #define HANTRO_PP_REG_WRITE_RELAXED(vpu, reg_name, val) \ |
25 | { \ |
26 | hantro_reg_write_relaxed(vpu, \ |
27 | &hantro_g1_postproc_regs.reg_name, \ |
28 | val); \ |
29 | } |
30 | |
31 | #define VPU_PP_IN_YUYV 0x0 |
32 | #define VPU_PP_IN_NV12 0x1 |
33 | #define VPU_PP_IN_YUV420 0x2 |
34 | #define VPU_PP_IN_YUV240_TILED 0x5 |
35 | #define VPU_PP_OUT_RGB 0x0 |
36 | #define VPU_PP_OUT_YUYV 0x3 |
37 | |
38 | static const struct hantro_postproc_regs hantro_g1_postproc_regs = { |
39 | .pipeline_en = {G1_REG_PP_INTERRUPT, 1, 0x1}, |
40 | .max_burst = {G1_REG_PP_DEV_CONFIG, 0, 0x1f}, |
41 | .clk_gate = {G1_REG_PP_DEV_CONFIG, 1, 0x1}, |
42 | .out_swap32 = {G1_REG_PP_DEV_CONFIG, 5, 0x1}, |
43 | .out_endian = {G1_REG_PP_DEV_CONFIG, 6, 0x1}, |
44 | .out_luma_base = {G1_REG_PP_OUT_LUMA_BASE, 0, 0xffffffff}, |
45 | .input_width = {G1_REG_PP_INPUT_SIZE, 0, 0x1ff}, |
46 | .input_height = {G1_REG_PP_INPUT_SIZE, 9, 0x1ff}, |
47 | .output_width = {G1_REG_PP_CONTROL, 4, 0x7ff}, |
48 | .output_height = {G1_REG_PP_CONTROL, 15, 0x7ff}, |
49 | .input_fmt = {G1_REG_PP_CONTROL, 29, 0x7}, |
50 | .output_fmt = {G1_REG_PP_CONTROL, 26, 0x7}, |
51 | .orig_width = {G1_REG_PP_MASK1_ORIG_WIDTH, 23, 0x1ff}, |
52 | .display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff}, |
53 | }; |
54 | |
55 | bool hantro_needs_postproc(const struct hantro_ctx *ctx, |
56 | const struct hantro_fmt *fmt) |
57 | { |
58 | if (ctx->is_encoder) |
59 | return false; |
60 | |
61 | if (ctx->need_postproc) |
62 | return true; |
63 | |
64 | return fmt->postprocessed; |
65 | } |
66 | |
67 | static void hantro_postproc_g1_enable(struct hantro_ctx *ctx) |
68 | { |
69 | struct hantro_dev *vpu = ctx->dev; |
70 | struct vb2_v4l2_buffer *dst_buf; |
71 | u32 src_pp_fmt, dst_pp_fmt; |
72 | dma_addr_t dst_dma; |
73 | |
74 | /* Turn on pipeline mode. Must be done first. */ |
75 | HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x1); |
76 | |
77 | src_pp_fmt = VPU_PP_IN_NV12; |
78 | |
79 | switch (ctx->vpu_dst_fmt->fourcc) { |
80 | case V4L2_PIX_FMT_YUYV: |
81 | dst_pp_fmt = VPU_PP_OUT_YUYV; |
82 | break; |
83 | default: |
84 | WARN(1, "output format %d not supported by the post-processor, this wasn't expected." , |
85 | ctx->vpu_dst_fmt->fourcc); |
86 | dst_pp_fmt = 0; |
87 | break; |
88 | } |
89 | |
90 | dst_buf = v4l2_m2m_next_dst_buf(m2m_ctx: ctx->fh.m2m_ctx); |
91 | dst_dma = vb2_dma_contig_plane_dma_addr(vb: &dst_buf->vb2_buf, plane_no: 0); |
92 | |
93 | HANTRO_PP_REG_WRITE(vpu, clk_gate, 0x1); |
94 | HANTRO_PP_REG_WRITE(vpu, out_endian, 0x1); |
95 | HANTRO_PP_REG_WRITE(vpu, out_swap32, 0x1); |
96 | HANTRO_PP_REG_WRITE(vpu, max_burst, 16); |
97 | HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma); |
98 | HANTRO_PP_REG_WRITE(vpu, input_width, MB_WIDTH(ctx->dst_fmt.width)); |
99 | HANTRO_PP_REG_WRITE(vpu, input_height, MB_HEIGHT(ctx->dst_fmt.height)); |
100 | HANTRO_PP_REG_WRITE(vpu, input_fmt, src_pp_fmt); |
101 | HANTRO_PP_REG_WRITE(vpu, output_fmt, dst_pp_fmt); |
102 | HANTRO_PP_REG_WRITE(vpu, output_width, ctx->dst_fmt.width); |
103 | HANTRO_PP_REG_WRITE(vpu, output_height, ctx->dst_fmt.height); |
104 | HANTRO_PP_REG_WRITE(vpu, orig_width, MB_WIDTH(ctx->dst_fmt.width)); |
105 | HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width); |
106 | } |
107 | |
108 | static int down_scale_factor(struct hantro_ctx *ctx) |
109 | { |
110 | if (ctx->src_fmt.width <= ctx->dst_fmt.width) |
111 | return 0; |
112 | |
113 | return DIV_ROUND_CLOSEST(ctx->src_fmt.width, ctx->dst_fmt.width); |
114 | } |
115 | |
116 | static void hantro_postproc_g2_enable(struct hantro_ctx *ctx) |
117 | { |
118 | struct hantro_dev *vpu = ctx->dev; |
119 | struct vb2_v4l2_buffer *dst_buf; |
120 | int down_scale = down_scale_factor(ctx); |
121 | int out_depth; |
122 | size_t chroma_offset; |
123 | dma_addr_t dst_dma; |
124 | |
125 | dst_buf = hantro_get_dst_buf(ctx); |
126 | dst_dma = vb2_dma_contig_plane_dma_addr(vb: &dst_buf->vb2_buf, plane_no: 0); |
127 | chroma_offset = ctx->dst_fmt.plane_fmt[0].bytesperline * |
128 | ctx->dst_fmt.height; |
129 | |
130 | if (down_scale) { |
131 | hantro_reg_write(vpu, reg: &g2_down_scale_e, val: 1); |
132 | hantro_reg_write(vpu, reg: &g2_down_scale_y, val: down_scale >> 2); |
133 | hantro_reg_write(vpu, reg: &g2_down_scale_x, val: down_scale >> 2); |
134 | hantro_write_addr(vpu, G2_DS_DST, addr: dst_dma); |
135 | hantro_write_addr(vpu, G2_DS_DST_CHR, addr: dst_dma + (chroma_offset >> down_scale)); |
136 | } else { |
137 | hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, addr: dst_dma); |
138 | hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, addr: dst_dma + chroma_offset); |
139 | } |
140 | |
141 | out_depth = hantro_get_format_depth(fourcc: ctx->dst_fmt.pixelformat); |
142 | if (ctx->dev->variant->legacy_regs) { |
143 | u8 pp_shift = 0; |
144 | |
145 | if (out_depth > 8) |
146 | pp_shift = 16 - out_depth; |
147 | |
148 | hantro_reg_write(vpu: ctx->dev, reg: &g2_rs_out_bit_depth, val: out_depth); |
149 | hantro_reg_write(vpu: ctx->dev, reg: &g2_pp_pix_shift, val: pp_shift); |
150 | } else { |
151 | hantro_reg_write(vpu, reg: &g2_output_8_bits, val: out_depth > 8 ? 0 : 1); |
152 | hantro_reg_write(vpu, reg: &g2_output_format, val: out_depth > 8 ? 1 : 0); |
153 | } |
154 | hantro_reg_write(vpu, reg: &g2_out_rs_e, val: 1); |
155 | } |
156 | |
157 | static int hantro_postproc_g2_enum_framesizes(struct hantro_ctx *ctx, |
158 | struct v4l2_frmsizeenum *fsize) |
159 | { |
160 | /** |
161 | * G2 scaler can scale down by 0, 2, 4 or 8 |
162 | * use fsize->index has power of 2 diviser |
163 | **/ |
164 | if (fsize->index > 3) |
165 | return -EINVAL; |
166 | |
167 | if (!ctx->src_fmt.width || !ctx->src_fmt.height) |
168 | return -EINVAL; |
169 | |
170 | fsize->type = V4L2_FRMSIZE_TYPE_DISCRETE; |
171 | fsize->discrete.width = ctx->src_fmt.width >> fsize->index; |
172 | fsize->discrete.height = ctx->src_fmt.height >> fsize->index; |
173 | |
174 | return 0; |
175 | } |
176 | |
177 | void hantro_postproc_free(struct hantro_ctx *ctx) |
178 | { |
179 | struct hantro_dev *vpu = ctx->dev; |
180 | struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; |
181 | struct vb2_queue *queue = &m2m_ctx->cap_q_ctx.q; |
182 | unsigned int i; |
183 | |
184 | for (i = 0; i < queue->max_num_buffers; ++i) { |
185 | struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i]; |
186 | |
187 | if (priv->cpu) { |
188 | dma_free_attrs(dev: vpu->dev, size: priv->size, cpu_addr: priv->cpu, |
189 | dma_handle: priv->dma, attrs: priv->attrs); |
190 | priv->cpu = NULL; |
191 | } |
192 | } |
193 | } |
194 | |
195 | static unsigned int hantro_postproc_buffer_size(struct hantro_ctx *ctx) |
196 | { |
197 | struct v4l2_pix_format_mplane pix_mp; |
198 | const struct hantro_fmt *fmt; |
199 | unsigned int buf_size; |
200 | |
201 | /* this should always pick native format */ |
202 | fmt = hantro_get_default_fmt(ctx, bitstream: false, bit_depth: ctx->bit_depth, HANTRO_AUTO_POSTPROC); |
203 | if (!fmt) |
204 | return 0; |
205 | |
206 | v4l2_fill_pixfmt_mp(pixfmt: &pix_mp, pixelformat: fmt->fourcc, width: ctx->src_fmt.width, |
207 | height: ctx->src_fmt.height); |
208 | |
209 | buf_size = pix_mp.plane_fmt[0].sizeimage; |
210 | if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE) |
211 | buf_size += hantro_h264_mv_size(width: pix_mp.width, |
212 | height: pix_mp.height); |
213 | else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME) |
214 | buf_size += hantro_vp9_mv_size(width: pix_mp.width, |
215 | height: pix_mp.height); |
216 | else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) |
217 | buf_size += hantro_hevc_mv_size(width: pix_mp.width, |
218 | height: pix_mp.height); |
219 | else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME) |
220 | buf_size += hantro_av1_mv_size(width: pix_mp.width, |
221 | height: pix_mp.height); |
222 | |
223 | return buf_size; |
224 | } |
225 | |
226 | static int hantro_postproc_alloc(struct hantro_ctx *ctx, int index) |
227 | { |
228 | struct hantro_dev *vpu = ctx->dev; |
229 | struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index]; |
230 | unsigned int buf_size = hantro_postproc_buffer_size(ctx); |
231 | |
232 | if (!buf_size) |
233 | return -EINVAL; |
234 | |
235 | /* |
236 | * The buffers on this queue are meant as intermediate |
237 | * buffers for the decoder, so no mapping is needed. |
238 | */ |
239 | priv->attrs = DMA_ATTR_NO_KERNEL_MAPPING; |
240 | priv->cpu = dma_alloc_attrs(dev: vpu->dev, size: buf_size, dma_handle: &priv->dma, |
241 | GFP_KERNEL, attrs: priv->attrs); |
242 | if (!priv->cpu) |
243 | return -ENOMEM; |
244 | priv->size = buf_size; |
245 | |
246 | return 0; |
247 | } |
248 | |
249 | int hantro_postproc_init(struct hantro_ctx *ctx) |
250 | { |
251 | struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; |
252 | struct vb2_queue *cap_queue = &m2m_ctx->cap_q_ctx.q; |
253 | unsigned int num_buffers = vb2_get_num_buffers(q: cap_queue); |
254 | unsigned int i; |
255 | int ret; |
256 | |
257 | for (i = 0; i < num_buffers; i++) { |
258 | ret = hantro_postproc_alloc(ctx, index: i); |
259 | if (ret) |
260 | return ret; |
261 | } |
262 | |
263 | return 0; |
264 | } |
265 | |
266 | dma_addr_t |
267 | hantro_postproc_get_dec_buf_addr(struct hantro_ctx *ctx, int index) |
268 | { |
269 | struct hantro_aux_buf *priv = &ctx->postproc.dec_q[index]; |
270 | unsigned int buf_size = hantro_postproc_buffer_size(ctx); |
271 | struct hantro_dev *vpu = ctx->dev; |
272 | int ret; |
273 | |
274 | if (priv->size < buf_size && priv->cpu) { |
275 | /* buffer is too small, release it */ |
276 | dma_free_attrs(dev: vpu->dev, size: priv->size, cpu_addr: priv->cpu, |
277 | dma_handle: priv->dma, attrs: priv->attrs); |
278 | priv->cpu = NULL; |
279 | } |
280 | |
281 | if (!priv->cpu) { |
282 | /* buffer not already allocated, try getting a new one */ |
283 | ret = hantro_postproc_alloc(ctx, index); |
284 | if (ret) |
285 | return 0; |
286 | } |
287 | |
288 | if (!priv->cpu) |
289 | return 0; |
290 | |
291 | return priv->dma; |
292 | } |
293 | |
294 | static void hantro_postproc_g1_disable(struct hantro_ctx *ctx) |
295 | { |
296 | struct hantro_dev *vpu = ctx->dev; |
297 | |
298 | HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x0); |
299 | } |
300 | |
301 | static void hantro_postproc_g2_disable(struct hantro_ctx *ctx) |
302 | { |
303 | struct hantro_dev *vpu = ctx->dev; |
304 | |
305 | hantro_reg_write(vpu, reg: &g2_out_rs_e, val: 0); |
306 | } |
307 | |
308 | void hantro_postproc_disable(struct hantro_ctx *ctx) |
309 | { |
310 | struct hantro_dev *vpu = ctx->dev; |
311 | |
312 | if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->disable) |
313 | vpu->variant->postproc_ops->disable(ctx); |
314 | } |
315 | |
316 | void hantro_postproc_enable(struct hantro_ctx *ctx) |
317 | { |
318 | struct hantro_dev *vpu = ctx->dev; |
319 | |
320 | if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enable) |
321 | vpu->variant->postproc_ops->enable(ctx); |
322 | } |
323 | |
324 | int hanto_postproc_enum_framesizes(struct hantro_ctx *ctx, |
325 | struct v4l2_frmsizeenum *fsize) |
326 | { |
327 | struct hantro_dev *vpu = ctx->dev; |
328 | |
329 | if (vpu->variant->postproc_ops && vpu->variant->postproc_ops->enum_framesizes) |
330 | return vpu->variant->postproc_ops->enum_framesizes(ctx, fsize); |
331 | |
332 | return -EINVAL; |
333 | } |
334 | |
335 | const struct hantro_postproc_ops hantro_g1_postproc_ops = { |
336 | .enable = hantro_postproc_g1_enable, |
337 | .disable = hantro_postproc_g1_disable, |
338 | }; |
339 | |
340 | const struct hantro_postproc_ops hantro_g2_postproc_ops = { |
341 | .enable = hantro_postproc_g2_enable, |
342 | .disable = hantro_postproc_g2_disable, |
343 | .enum_framesizes = hantro_postproc_g2_enum_framesizes, |
344 | }; |
345 | |