1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Hantro VPU codec driver
4 *
5 * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
6 *
7 * JPEG encoder
8 * ------------
9 * The VPU JPEG encoder produces JPEG baseline sequential format.
10 * The quantization coefficients are 8-bit values, complying with
11 * the baseline specification. Therefore, it requires
12 * luma and chroma quantization tables. The hardware does entropy
13 * encoding using internal Huffman tables, as specified in the JPEG
14 * specification.
15 *
16 * In other words, only the luma and chroma quantization tables are
17 * required for the encoding operation.
18 *
19 * Quantization luma table values are written to registers
20 * VEPU_swreg_0-VEPU_swreg_15, and chroma table values to
21 * VEPU_swreg_16-VEPU_swreg_31. A special order is needed, neither
22 * zigzag, nor linear.
23 */
24
25#include <asm/unaligned.h>
26#include <media/v4l2-mem2mem.h>
27#include "hantro_jpeg.h"
28#include "hantro.h"
29#include "hantro_v4l2.h"
30#include "hantro_hw.h"
31#include "rockchip_vpu2_regs.h"
32
33#define VEPU_JPEG_QUANT_TABLE_COUNT 16
34
35static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu,
36 struct hantro_ctx *ctx)
37{
38 u32 overfill_r, overfill_b;
39 u32 reg;
40
41 /*
42 * The format width and height are already macroblock aligned
43 * by .vidioc_s_fmt_vid_cap_mplane() callback. Destination
44 * format width and height can be further modified by
45 * .vidioc_s_selection(), and the width is 4-aligned.
46 */
47 overfill_r = ctx->src_fmt.width - ctx->dst_fmt.width;
48 overfill_b = ctx->src_fmt.height - ctx->dst_fmt.height;
49
50 reg = VEPU_REG_IN_IMG_CTRL_ROW_LEN(ctx->src_fmt.width);
51 vepu_write_relaxed(vpu, val: reg, VEPU_REG_INPUT_LUMA_INFO);
52
53 reg = VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r / 4) |
54 VEPU_REG_IN_IMG_CTRL_OVRFLB(overfill_b);
55 /*
56 * This register controls the input crop, as the offset
57 * from the right/bottom within the last macroblock. The offset from the
58 * right must be divided by 4 and so the crop must be aligned to 4 pixels
59 * horizontally.
60 */
61 vepu_write_relaxed(vpu, val: reg, VEPU_REG_ENC_OVER_FILL_STRM_OFFSET);
62
63 reg = VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
64 vepu_write_relaxed(vpu, val: reg, VEPU_REG_ENC_CTRL1);
65}
66
67static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu,
68 struct hantro_ctx *ctx,
69 struct vb2_buffer *src_buf,
70 struct vb2_buffer *dst_buf)
71{
72 struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
73 dma_addr_t src[3];
74 u32 size_left;
75
76 size_left = vb2_plane_size(vb: dst_buf, plane_no: 0) - ctx->vpu_dst_fmt->header_size;
77 if (WARN_ON(vb2_plane_size(dst_buf, 0) < ctx->vpu_dst_fmt->header_size))
78 size_left = 0;
79
80 WARN_ON(pix_fmt->num_planes > 3);
81
82 vepu_write_relaxed(vpu, val: vb2_dma_contig_plane_dma_addr(vb: dst_buf, plane_no: 0) +
83 ctx->vpu_dst_fmt->header_size,
84 VEPU_REG_ADDR_OUTPUT_STREAM);
85 vepu_write_relaxed(vpu, val: size_left, VEPU_REG_STR_BUF_LIMIT);
86
87 if (pix_fmt->num_planes == 1) {
88 src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0);
89 vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0);
90 } else if (pix_fmt->num_planes == 2) {
91 src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0);
92 src[1] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 1);
93 vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0);
94 vepu_write_relaxed(vpu, val: src[1], VEPU_REG_ADDR_IN_PLANE_1);
95 } else {
96 src[0] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 0);
97 src[1] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 1);
98 src[2] = vb2_dma_contig_plane_dma_addr(vb: src_buf, plane_no: 2);
99 vepu_write_relaxed(vpu, val: src[0], VEPU_REG_ADDR_IN_PLANE_0);
100 vepu_write_relaxed(vpu, val: src[1], VEPU_REG_ADDR_IN_PLANE_1);
101 vepu_write_relaxed(vpu, val: src[2], VEPU_REG_ADDR_IN_PLANE_2);
102 }
103}
104
105static void
106rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu,
107 unsigned char *luma_qtable,
108 unsigned char *chroma_qtable)
109{
110 u32 reg, i;
111 __be32 *luma_qtable_p;
112 __be32 *chroma_qtable_p;
113
114 luma_qtable_p = (__be32 *)luma_qtable;
115 chroma_qtable_p = (__be32 *)chroma_qtable;
116
117 /*
118 * Quantization table registers must be written in contiguous blocks.
119 * DO NOT collapse the below two "for" loops into one.
120 */
121 for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
122 reg = get_unaligned_be32(p: &luma_qtable_p[i]);
123 vepu_write_relaxed(vpu, val: reg, VEPU_REG_JPEG_LUMA_QUAT(i));
124 }
125
126 for (i = 0; i < VEPU_JPEG_QUANT_TABLE_COUNT; i++) {
127 reg = get_unaligned_be32(p: &chroma_qtable_p[i]);
128 vepu_write_relaxed(vpu, val: reg, VEPU_REG_JPEG_CHROMA_QUAT(i));
129 }
130}
131
132int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
133{
134 struct hantro_dev *vpu = ctx->dev;
135 struct vb2_v4l2_buffer *src_buf, *dst_buf;
136 struct hantro_jpeg_ctx jpeg_ctx;
137 u32 reg;
138
139 src_buf = hantro_get_src_buf(ctx);
140 dst_buf = hantro_get_dst_buf(ctx);
141
142 hantro_start_prepare_run(ctx);
143
144 memset(&jpeg_ctx, 0, sizeof(jpeg_ctx));
145 jpeg_ctx.buffer = vb2_plane_vaddr(vb: &dst_buf->vb2_buf, plane_no: 0);
146 if (!jpeg_ctx.buffer)
147 return -ENOMEM;
148
149 jpeg_ctx.width = ctx->dst_fmt.width;
150 jpeg_ctx.height = ctx->dst_fmt.height;
151 jpeg_ctx.quality = ctx->jpeg_quality;
152 hantro_jpeg_header_assemble(ctx: &jpeg_ctx);
153
154 /* Switch to JPEG encoder mode before writing registers */
155 vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG,
156 VEPU_REG_ENCODE_START);
157
158 rockchip_vpu2_set_src_img_ctrl(vpu, ctx);
159 rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, src_buf: &src_buf->vb2_buf,
160 dst_buf: &dst_buf->vb2_buf);
161 rockchip_vpu2_jpeg_enc_set_qtable(vpu, luma_qtable: jpeg_ctx.hw_luma_qtable,
162 chroma_qtable: jpeg_ctx.hw_chroma_qtable);
163
164 reg = VEPU_REG_OUTPUT_SWAP32
165 | VEPU_REG_OUTPUT_SWAP16
166 | VEPU_REG_OUTPUT_SWAP8
167 | VEPU_REG_INPUT_SWAP8
168 | VEPU_REG_INPUT_SWAP16
169 | VEPU_REG_INPUT_SWAP32;
170 /* Make sure that all registers are written at this point. */
171 vepu_write(vpu, val: reg, VEPU_REG_DATA_ENDIAN);
172
173 reg = VEPU_REG_AXI_CTRL_BURST_LEN(16);
174 vepu_write_relaxed(vpu, val: reg, VEPU_REG_AXI_CTRL);
175
176 reg = VEPU_REG_MB_WIDTH(MB_WIDTH(ctx->src_fmt.width))
177 | VEPU_REG_MB_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
178 | VEPU_REG_FRAME_TYPE_INTRA
179 | VEPU_REG_ENCODE_FORMAT_JPEG
180 | VEPU_REG_ENCODE_ENABLE;
181
182 /* Kick the watchdog and start encoding */
183 hantro_end_prepare_run(ctx);
184 vepu_write(vpu, val: reg, VEPU_REG_ENCODE_START);
185
186 return 0;
187}
188
189void rockchip_vpu2_jpeg_enc_done(struct hantro_ctx *ctx)
190{
191 struct hantro_dev *vpu = ctx->dev;
192 u32 bytesused = vepu_read(vpu, VEPU_REG_STR_BUF_LIMIT) / 8;
193 struct vb2_v4l2_buffer *dst_buf = hantro_get_dst_buf(ctx);
194
195 vb2_set_plane_payload(vb: &dst_buf->vb2_buf, plane_no: 0,
196 size: ctx->vpu_dst_fmt->header_size + bytesused);
197}
198

source code of linux/drivers/media/platform/verisilicon/rockchip_vpu2_hw_jpeg_enc.c