1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Rockchip RK3288 VPU codec driver |
4 | * |
5 | * Copyright (c) 2014 Rockchip Electronics Co., Ltd. |
6 | * Hertz Wong <hertz.wong@rock-chips.com> |
7 | * Herman Chen <herman.chen@rock-chips.com> |
8 | * |
9 | * Copyright (C) 2014 Google, Inc. |
10 | * Tomasz Figa <tfiga@chromium.org> |
11 | */ |
12 | |
13 | #include <linux/types.h> |
14 | #include <linux/sort.h> |
15 | |
16 | #include <media/v4l2-mem2mem.h> |
17 | |
18 | #include "hantro_g1_regs.h" |
19 | #include "hantro_hw.h" |
20 | #include "hantro_v4l2.h" |
21 | |
22 | static void set_params(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) |
23 | { |
24 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
25 | const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode; |
26 | const struct v4l2_ctrl_h264_sps *sps = ctrls->sps; |
27 | const struct v4l2_ctrl_h264_pps *pps = ctrls->pps; |
28 | struct hantro_dev *vpu = ctx->dev; |
29 | u32 reg; |
30 | |
31 | /* Decoder control register 0. */ |
32 | reg = G1_REG_DEC_CTRL0_DEC_AXI_AUTO; |
33 | if (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD) |
34 | reg |= G1_REG_DEC_CTRL0_SEQ_MBAFF_E; |
35 | if (sps->profile_idc > 66) { |
36 | reg |= G1_REG_DEC_CTRL0_PICORD_COUNT_E; |
37 | if (dec_param->nal_ref_idc) |
38 | reg |= G1_REG_DEC_CTRL0_WRITE_MVS_E; |
39 | } |
40 | |
41 | if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY) && |
42 | (sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD || |
43 | dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC)) |
44 | reg |= G1_REG_DEC_CTRL0_PIC_INTERLACE_E; |
45 | if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) |
46 | reg |= G1_REG_DEC_CTRL0_PIC_FIELDMODE_E; |
47 | if (!(dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD)) |
48 | reg |= G1_REG_DEC_CTRL0_PIC_TOPFIELD_E; |
49 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL0); |
50 | |
51 | /* Decoder control register 1. */ |
52 | reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width)) | |
53 | G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->src_fmt.height)) | |
54 | G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames); |
55 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL1); |
56 | |
57 | /* Decoder control register 2. */ |
58 | reg = G1_REG_DEC_CTRL2_CH_QP_OFFSET(pps->chroma_qp_index_offset) | |
59 | G1_REG_DEC_CTRL2_CH_QP_OFFSET2(pps->second_chroma_qp_index_offset); |
60 | |
61 | if (pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT) |
62 | reg |= G1_REG_DEC_CTRL2_TYPE1_QUANT_E; |
63 | if (!(sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)) |
64 | reg |= G1_REG_DEC_CTRL2_FIELDPIC_FLAG_E; |
65 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL2); |
66 | |
67 | /* Decoder control register 3. */ |
68 | reg = G1_REG_DEC_CTRL3_START_CODE_E | |
69 | G1_REG_DEC_CTRL3_INIT_QP(pps->pic_init_qp_minus26 + 26) | |
70 | G1_REG_DEC_CTRL3_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0)); |
71 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL3); |
72 | |
73 | /* Decoder control register 4. */ |
74 | reg = G1_REG_DEC_CTRL4_FRAMENUM_LEN(sps->log2_max_frame_num_minus4 + 4) | |
75 | G1_REG_DEC_CTRL4_FRAMENUM(dec_param->frame_num) | |
76 | G1_REG_DEC_CTRL4_WEIGHT_BIPR_IDC(pps->weighted_bipred_idc); |
77 | if (pps->flags & V4L2_H264_PPS_FLAG_ENTROPY_CODING_MODE) |
78 | reg |= G1_REG_DEC_CTRL4_CABAC_E; |
79 | if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE) |
80 | reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E; |
81 | if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0) |
82 | reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E; |
83 | if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED) |
84 | reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E; |
85 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL4); |
86 | |
87 | /* Decoder control register 5. */ |
88 | reg = G1_REG_DEC_CTRL5_REFPIC_MK_LEN(dec_param->dec_ref_pic_marking_bit_size) | |
89 | G1_REG_DEC_CTRL5_IDR_PIC_ID(dec_param->idr_pic_id); |
90 | if (pps->flags & V4L2_H264_PPS_FLAG_CONSTRAINED_INTRA_PRED) |
91 | reg |= G1_REG_DEC_CTRL5_CONST_INTRA_E; |
92 | if (pps->flags & V4L2_H264_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT) |
93 | reg |= G1_REG_DEC_CTRL5_FILT_CTRL_PRES; |
94 | if (pps->flags & V4L2_H264_PPS_FLAG_REDUNDANT_PIC_CNT_PRESENT) |
95 | reg |= G1_REG_DEC_CTRL5_RDPIC_CNT_PRES; |
96 | if (pps->flags & V4L2_H264_PPS_FLAG_TRANSFORM_8X8_MODE) |
97 | reg |= G1_REG_DEC_CTRL5_8X8TRANS_FLAG_E; |
98 | if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_IDR_PIC) |
99 | reg |= G1_REG_DEC_CTRL5_IDR_PIC_E; |
100 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL5); |
101 | |
102 | /* Decoder control register 6. */ |
103 | reg = G1_REG_DEC_CTRL6_PPS_ID(pps->pic_parameter_set_id) | |
104 | G1_REG_DEC_CTRL6_REFIDX0_ACTIVE(pps->num_ref_idx_l0_default_active_minus1 + 1) | |
105 | G1_REG_DEC_CTRL6_REFIDX1_ACTIVE(pps->num_ref_idx_l1_default_active_minus1 + 1) | |
106 | G1_REG_DEC_CTRL6_POC_LENGTH(dec_param->pic_order_cnt_bit_size); |
107 | vdpu_write_relaxed(vpu, val: reg, G1_REG_DEC_CTRL6); |
108 | |
109 | /* Error concealment register. */ |
110 | vdpu_write_relaxed(vpu, val: 0, G1_REG_ERR_CONC); |
111 | |
112 | /* Prediction filter tap register. */ |
113 | vdpu_write_relaxed(vpu, |
114 | G1_REG_PRED_FLT_PRED_BC_TAP_0_0(1) | |
115 | G1_REG_PRED_FLT_PRED_BC_TAP_0_1(-5 & 0x3ff) | |
116 | G1_REG_PRED_FLT_PRED_BC_TAP_0_2(20), |
117 | G1_REG_PRED_FLT); |
118 | |
119 | /* Reference picture buffer control register. */ |
120 | vdpu_write_relaxed(vpu, val: 0, G1_REG_REF_BUF_CTRL); |
121 | |
122 | /* Reference picture buffer control register 2. */ |
123 | vdpu_write_relaxed(vpu, G1_REG_REF_BUF_CTRL2_APF_THRESHOLD(8), |
124 | G1_REG_REF_BUF_CTRL2); |
125 | } |
126 | |
127 | static void set_ref(struct hantro_ctx *ctx) |
128 | { |
129 | const struct v4l2_h264_reference *b0_reflist, *b1_reflist, *p_reflist; |
130 | struct hantro_dev *vpu = ctx->dev; |
131 | int reg_num; |
132 | u32 reg; |
133 | int i; |
134 | |
135 | vdpu_write_relaxed(vpu, val: ctx->h264_dec.dpb_valid, G1_REG_VALID_REF); |
136 | vdpu_write_relaxed(vpu, val: ctx->h264_dec.dpb_longterm, G1_REG_LT_REF); |
137 | |
138 | /* |
139 | * Set up reference frame picture numbers. |
140 | * |
141 | * Each G1_REG_REF_PIC(x) register contains numbers of two |
142 | * subsequential reference pictures. |
143 | */ |
144 | for (i = 0; i < HANTRO_H264_DPB_SIZE; i += 2) { |
145 | reg = G1_REG_REF_PIC_REFER0_NBR(hantro_h264_get_ref_nbr(ctx, i)) | |
146 | G1_REG_REF_PIC_REFER1_NBR(hantro_h264_get_ref_nbr(ctx, i + 1)); |
147 | vdpu_write_relaxed(vpu, val: reg, G1_REG_REF_PIC(i / 2)); |
148 | } |
149 | |
150 | b0_reflist = ctx->h264_dec.reflists.b0; |
151 | b1_reflist = ctx->h264_dec.reflists.b1; |
152 | p_reflist = ctx->h264_dec.reflists.p; |
153 | |
154 | /* |
155 | * Each G1_REG_BD_REF_PIC(x) register contains three entries |
156 | * of each forward and backward picture list. |
157 | */ |
158 | reg_num = 0; |
159 | for (i = 0; i < 15; i += 3) { |
160 | reg = G1_REG_BD_REF_PIC_BINIT_RLIST_F0(b0_reflist[i].index) | |
161 | G1_REG_BD_REF_PIC_BINIT_RLIST_F1(b0_reflist[i + 1].index) | |
162 | G1_REG_BD_REF_PIC_BINIT_RLIST_F2(b0_reflist[i + 2].index) | |
163 | G1_REG_BD_REF_PIC_BINIT_RLIST_B0(b1_reflist[i].index) | |
164 | G1_REG_BD_REF_PIC_BINIT_RLIST_B1(b1_reflist[i + 1].index) | |
165 | G1_REG_BD_REF_PIC_BINIT_RLIST_B2(b1_reflist[i + 2].index); |
166 | vdpu_write_relaxed(vpu, val: reg, G1_REG_BD_REF_PIC(reg_num++)); |
167 | } |
168 | |
169 | /* |
170 | * G1_REG_BD_P_REF_PIC register contains last entries (index 15) |
171 | * of forward and backward reference picture lists and first 4 entries |
172 | * of P forward picture list. |
173 | */ |
174 | reg = G1_REG_BD_P_REF_PIC_BINIT_RLIST_F15(b0_reflist[15].index) | |
175 | G1_REG_BD_P_REF_PIC_BINIT_RLIST_B15(b1_reflist[15].index) | |
176 | G1_REG_BD_P_REF_PIC_PINIT_RLIST_F0(p_reflist[0].index) | |
177 | G1_REG_BD_P_REF_PIC_PINIT_RLIST_F1(p_reflist[1].index) | |
178 | G1_REG_BD_P_REF_PIC_PINIT_RLIST_F2(p_reflist[2].index) | |
179 | G1_REG_BD_P_REF_PIC_PINIT_RLIST_F3(p_reflist[3].index); |
180 | vdpu_write_relaxed(vpu, val: reg, G1_REG_BD_P_REF_PIC); |
181 | |
182 | /* |
183 | * Each G1_REG_FWD_PIC(x) register contains six consecutive |
184 | * entries of P forward picture list, starting from index 4. |
185 | */ |
186 | reg_num = 0; |
187 | for (i = 4; i < HANTRO_H264_DPB_SIZE; i += 6) { |
188 | reg = G1_REG_FWD_PIC_PINIT_RLIST_F0(p_reflist[i].index) | |
189 | G1_REG_FWD_PIC_PINIT_RLIST_F1(p_reflist[i + 1].index) | |
190 | G1_REG_FWD_PIC_PINIT_RLIST_F2(p_reflist[i + 2].index) | |
191 | G1_REG_FWD_PIC_PINIT_RLIST_F3(p_reflist[i + 3].index) | |
192 | G1_REG_FWD_PIC_PINIT_RLIST_F4(p_reflist[i + 4].index) | |
193 | G1_REG_FWD_PIC_PINIT_RLIST_F5(p_reflist[i + 5].index); |
194 | vdpu_write_relaxed(vpu, val: reg, G1_REG_FWD_PIC(reg_num++)); |
195 | } |
196 | |
197 | /* Set up addresses of DPB buffers. */ |
198 | for (i = 0; i < HANTRO_H264_DPB_SIZE; i++) { |
199 | dma_addr_t dma_addr = hantro_h264_get_ref_buf(ctx, dpb_idx: i); |
200 | |
201 | vdpu_write_relaxed(vpu, val: dma_addr, G1_REG_ADDR_REF(i)); |
202 | } |
203 | } |
204 | |
205 | static void set_buffers(struct hantro_ctx *ctx, struct vb2_v4l2_buffer *src_buf) |
206 | { |
207 | const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls; |
208 | struct vb2_v4l2_buffer *dst_buf; |
209 | struct hantro_dev *vpu = ctx->dev; |
210 | dma_addr_t src_dma, dst_dma; |
211 | size_t offset = 0; |
212 | |
213 | /* Source (stream) buffer. */ |
214 | src_dma = vb2_dma_contig_plane_dma_addr(vb: &src_buf->vb2_buf, plane_no: 0); |
215 | vdpu_write_relaxed(vpu, val: src_dma, G1_REG_ADDR_STR); |
216 | |
217 | /* Destination (decoded frame) buffer. */ |
218 | dst_buf = hantro_get_dst_buf(ctx); |
219 | dst_dma = hantro_get_dec_buf_addr(ctx, vb: &dst_buf->vb2_buf); |
220 | /* Adjust dma addr to start at second line for bottom field */ |
221 | if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) |
222 | offset = ALIGN(ctx->src_fmt.width, MB_DIM); |
223 | vdpu_write_relaxed(vpu, val: dst_dma + offset, G1_REG_ADDR_DST); |
224 | |
225 | /* Higher profiles require DMV buffer appended to reference frames. */ |
226 | if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) { |
227 | unsigned int bytes_per_mb = 384; |
228 | |
229 | /* DMV buffer for monochrome start directly after Y-plane */ |
230 | if (ctrls->sps->profile_idc >= 100 && |
231 | ctrls->sps->chroma_format_idc == 0) |
232 | bytes_per_mb = 256; |
233 | offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) * |
234 | MB_HEIGHT(ctx->src_fmt.height); |
235 | |
236 | /* |
237 | * DMV buffer is split in two for field encoded frames, |
238 | * adjust offset for bottom field |
239 | */ |
240 | if (ctrls->decode->flags & V4L2_H264_DECODE_PARAM_FLAG_BOTTOM_FIELD) |
241 | offset += 32 * MB_WIDTH(ctx->src_fmt.width) * |
242 | MB_HEIGHT(ctx->src_fmt.height); |
243 | vdpu_write_relaxed(vpu, val: dst_dma + offset, G1_REG_ADDR_DIR_MV); |
244 | } |
245 | |
246 | /* Auxiliary buffer prepared in hantro_h264_dec_init(). */ |
247 | vdpu_write_relaxed(vpu, val: ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE); |
248 | } |
249 | |
250 | int hantro_g1_h264_dec_run(struct hantro_ctx *ctx) |
251 | { |
252 | struct hantro_dev *vpu = ctx->dev; |
253 | struct vb2_v4l2_buffer *src_buf; |
254 | int ret; |
255 | |
256 | /* Prepare the H264 decoder context. */ |
257 | ret = hantro_h264_dec_prepare_run(ctx); |
258 | if (ret) |
259 | return ret; |
260 | |
261 | /* Configure hardware registers. */ |
262 | src_buf = hantro_get_src_buf(ctx); |
263 | set_params(ctx, src_buf); |
264 | set_ref(ctx); |
265 | set_buffers(ctx, src_buf); |
266 | |
267 | hantro_end_prepare_run(ctx); |
268 | |
269 | /* Start decoding! */ |
270 | vdpu_write_relaxed(vpu, |
271 | G1_REG_CONFIG_DEC_AXI_RD_ID(0xffu) | |
272 | G1_REG_CONFIG_DEC_TIMEOUT_E | |
273 | G1_REG_CONFIG_DEC_OUT_ENDIAN | |
274 | G1_REG_CONFIG_DEC_STRENDIAN_E | |
275 | G1_REG_CONFIG_DEC_MAX_BURST(16) | |
276 | G1_REG_CONFIG_DEC_OUTSWAP32_E | |
277 | G1_REG_CONFIG_DEC_INSWAP32_E | |
278 | G1_REG_CONFIG_DEC_STRSWAP32_E | |
279 | G1_REG_CONFIG_DEC_CLK_GATE_E, |
280 | G1_REG_CONFIG); |
281 | vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT); |
282 | |
283 | return 0; |
284 | } |
285 | |