1/****************************************************************************
2**
3** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "../painting/qt_mips_asm_dsp_p.h"
41
42LEAF_MIPS_DSPR2(premultiply_argb_inplace_mips_asm)
43
44 SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
45
463: srl v1, a2, 3 /* t1 = linelen / 8 */
47 addiu a1, a1, -1 /* numlines-- */
48 beqz v1, 1f /* if (!(linelen / 8)): tail */
49 andi v0, a2, 0x7 /* v0 = linelen % 8 */
50 pref 5, 0 (a0) /* cache-hint: store-streamed */
51
52 /* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */
532: addiu v1, v1, -1
54 pref 5, 0(a0)
55 pref 5, 32(a0)
56
57 lw t0, 0(a0)
58 lw t1, 4(a0)
59 lw t2, 8(a0)
60 lw t3, 12(a0)
61 srl t4, t0, 24 /* 00|00|00|A1 */
62 replv.ph t5, t4 /* 00|A1|00|A1 */
63 srl t6, t1, 24 /* 00|00|00|A2 */
64 replv.ph t7, t6 /* 00|A2|00|A2 */
65 muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
66 muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
67 muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
68 muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
69 srl t5, t2, 24 /* 00|00|00|A3 */
70 replv.ph s0, t5 /* 00|A3|00|A3 */
71 srl t7, t3, 24 /* 00|00|00|A4 */
72 replv.ph s1, t7 /* 00|A4|00|A4 */
73 muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
74 muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
75 muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
76 muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
77 preceu.ph.qbla s1, t8
78 preceu.ph.qbla s3, t0
79 addu.ph t8, t8, s1
80 addu.ph t0, t0, s3
81 preceu.ph.qbla s1, t9
82 preceu.ph.qbla s3, t1
83 addu.ph t9, t9, s1
84 addu.ph t1, t1, s3
85 preceu.ph.qbla s1, s2
86 preceu.ph.qbla s3, t2
87 addu.ph s2, s2, s1
88 addu.ph t2, t2, s3
89 preceu.ph.qbla s1, s0
90 preceu.ph.qbla s3, t3
91 addu.ph s0, s0, s1
92 addu.ph t3, t3, s3
93 shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
94 shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
95 shra_r.ph t9, t9, 8
96 shra_r.ph t1, t1, 8
97 shra_r.ph s2, s2, 8
98 shra_r.ph t2, t2, 8
99 shra_r.ph s0, s0, 8
100 shra_r.ph t3, t3, 8
101 precr.qb.ph t0, t8, t0
102 precr.qb.ph t1, t9, t1
103 precr.qb.ph t2, s2, t2
104 precr.qb.ph t3, s0, t3
105 append t4, t0, 24
106 append t6, t1, 24
107 append t5, t2, 24
108 append t7, t3, 24
109 sw t4, 0(a0)
110 sw t6, 4(a0)
111 sw t5, 8(a0)
112 sw t7, 12(a0)
113
114 lw t0, 16(a0)
115 lw t1, 20(a0)
116 lw t2, 24(a0)
117 lw t3, 28(a0)
118 srl t4, t0, 24 /* 00|00|00|A1 */
119 replv.ph t5, t4 /* 00|A1|00|A1 */
120 srl t6, t1, 24 /* 00|00|00|A2 */
121 replv.ph t7, t6 /* 00|A2|00|A2 */
122 muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
123 muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
124 muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
125 muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
126 srl t5, t2, 24 /* 00|00|00|A3 */
127 replv.ph s0, t5 /* 00|A3|00|A3 */
128 srl t7, t3, 24 /* 00|00|00|A4 */
129 replv.ph s1, t7 /* 00|A4|00|A4 */
130 muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
131 muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
132 muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
133 muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
134 preceu.ph.qbla s1, t8
135 preceu.ph.qbla s3, t0
136 addu.ph t8, t8, s1
137 addu.ph t0, t0, s3
138 preceu.ph.qbla s1, t9
139 preceu.ph.qbla s3, t1
140 addu.ph t9, t9, s1
141 addu.ph t1, t1, s3
142 preceu.ph.qbla s1, s2
143 preceu.ph.qbla s3, t2
144 addu.ph s2, s2, s1
145 addu.ph t2, t2, s3
146 preceu.ph.qbla s1, s0
147 preceu.ph.qbla s3, t3
148 addu.ph s0, s0, s1
149 addu.ph t3, t3, s3
150 shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
151 shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
152 shra_r.ph t9, t9, 8
153 shra_r.ph t1, t1, 8
154 shra_r.ph s2, s2, 8
155 shra_r.ph t2, t2, 8
156 shra_r.ph s0, s0, 8
157 shra_r.ph t3, t3, 8
158 precr.qb.ph t0, t8, t0
159 precr.qb.ph t1, t9, t1
160 precr.qb.ph t2, s2, t2
161 precr.qb.ph t3, s0, t3
162 append t4, t0, 24
163 append t6, t1, 24
164 append t5, t2, 24
165 append t7, t3, 24
166 sw t4, 16(a0)
167 sw t6, 20(a0)
168 sw t5, 24(a0)
169 sw t7, 28(a0)
170 bgtz v1, 2b /* if (t1): unrolled loop */
171 addiu a0, a0, 32 /* data += 8 */
172
173 beqz v0, 4f /* if (!v0): skip tail loop */
174 nop
175
176 /* tail loop, handles (len < 8), one pixel at a time */
1771: lw t1, 0 (a0)
178 addiu v0, v0, -1 /* len-- */
179 srl t2, t1, 24 /* t2 = alpha */
180 replv.ph t3, t2
181 muleu_s.ph.qbl t4, t1, t3
182 muleu_s.ph.qbr t1, t1, t3
183 preceu.ph.qbla t3, t4
184 preceu.ph.qbla t5, t1
185 addu.ph t4, t4, t3
186 addu.ph t1, t1, t5
187 shra_r.ph t4, t4, 8
188 shra_r.ph t1, t1, 8
189 precr.qb.ph t1, t4, t1
190 append t2, t1, 24
191 sw t2, 0(a0)
192 bgtz v0, 1b
193 addiu a0, a0, 4 /* src++ */
194
1954: bnez a1, 3b /* if (numlines): loop */
196 addu a0, a0, a3 /* src += srclineskip */
197
1980: /* return */
199 RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
200
201 jr ra
202 nop
203
204END(premultiply_argb_inplace_mips_asm)
205
206
207LEAF_MIPS_DSPR2(qt_convert_rgb888_to_rgb32_mips_dspr2_asm)
208/*
209 * Parameters:
210 * a0 - dst *a8r8g8b8
211 * a1 - src *r8g8b8
212 * a2 - len
213 *
214 * R G B r g b R G B r g b R G B r g b . . . -- input
215 * ------- ------- ------- ------- -------
216 * _ R G B _ r g b _ R G B _ r g b _ R G . . -- output
217 *
218 * Register usage:
219 * a2 - tail (len % 4) == (len & 0x3)
220 * t0 - batches (len / 4) == (len >> 2)
221 * t1-t7, s1-s3 - temporary
222 */
223
224 srl t0, a2, 2 /* batches = len / 4 */
225 andi a2, a2, 0x3 /* tail = len % 4 */
226
227 beqz t0, 5f /* if !batches: tail */
228 lui t7, 0xff00 /* [FF 00 00 00] */
229 SAVE_REGS_ON_STACK 8, s1, s2, s3, s0, v0, v1
230
2311: pref 4, 0 (a1) /* hint: read-streamed */
232 pref 5, 0 (a0) /* hint: prepare-write */
233 addiu t0, t0, -1 /* batches-- */
234
235 lbu t1, 0 (a1) /* [__ __ __ R1] */
236 lbu t2, 1 (a1) /* [__ __ __ G1] */
237 lbu t3, 2 (a1) /* [__ __ __ B1] */
238
239 lbu t4, 3 (a1) /* [__ __ __ r2] */
240 lbu t5, 4 (a1) /* [__ __ __ g2] */
241 lbu t6, 5 (a1) /* [__ __ __ b2] */
242
243 lbu s1, 6 (a1) /* [__ __ __ R3] */
244 lbu s2, 7 (a1) /* [__ __ __ G3] */
245 lbu s3, 8 (a1) /* [__ __ __ B3] */
246
247 lbu s0, 9 (a1) /* [__ __ __ r4] */
248 lbu v0, 10 (a1) /* [__ __ __ g4] */
249 lbu v1, 11 (a1) /* [__ __ __ b4] */
250
251 append t1, t2, 8 /* [__ __ R1 G1] */
252 append t4, t5, 8 /* [__ __ r2 g2] */
253 append s1, s2, 8 /* [__ __ R3 G3] */
254 append s0, v0, 8 /* [__ __ r4 g4] */
255 append t1, t3, 8 /* [__ R1 G1 B1] */
256 append t4, t6, 8 /* [__ r2 g2 b2] */
257 append s1, s3, 8 /* [__ R3 G4 B3] */
258 append s0, v1, 8 /* [__ r4 g4 b4] */
259 or t1, t1, t7 /* [FF R1 G1 B1] */
260 or t4, t4, t7 /* [FF r2 g2 b2] */
261 or s1, s1, t7 /* [FF R3 G3 B3] */
262 or s0, s0, t7 /* [FF r4 g4 b4] */
263
264 sw t1, 0 (a0)
265 sw t4, 4 (a0)
266 sw s1, 8 (a0)
267 sw s0, 12 (a0)
268
269 addiu a1, a1, 12 /* src += 4*3 */
270 bnez t0, 1b /* if batches: loop */
271 addiu a0, a0, 16 /* dst += 4 */
272
273 RESTORE_REGS_FROM_STACK 8, s1, s2, s3, s0, v0, v1
274
275 /* handle remaining "tail" (a2) items */
2765: beqz a2, 0f
277 lui t0, 0xff00 /* [FF __ __ __] */
278
2791: lbu t1, 0 (a1) /* [__ __ __ RR] */
280 lbu t2, 1 (a1) /* [__ __ __ GG] */
281 lbu t3, 2 (a1) /* [__ __ __ BB] */
282 sll t1, t1, 16 /* [__ RR __ __] */
283 sll t2, t2, 8 /* [__ __ GG __] */
284 or t0, t0, t1 /* [FF RR __ __] */
285 or t2, t2, t3 /* [__ __ GG BB] */
286 addi a2, a2, -1 /* len-- */
287 or t0, t0, t2 /* [FF RR GG BB] */
288 addiu a1, a1, 3 /* src += 3 */
289 sw t0, 0 (a0)
290 addiu a0, a0, 4 /* dst++ */
291 bnez a2, 1b /* if tail: loop */
292 lui t0, 0xff00 /* [FF __ __ __] */
293
2940: jr ra
295 nop
296
297END(qt_convert_rgb888_to_rgb32_mips_dspr2_asm)
298