1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtGui module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "../painting/qt_mips_asm_dsp_p.h" |
41 | |
42 | LEAF_MIPS_DSPR2(premultiply_argb_inplace_mips_asm) |
43 | |
44 | SAVE_REGS_ON_STACK 0, s0, s1, s2, s3 |
45 | |
46 | 3: srl v1, a2, 3 /* t1 = linelen / 8 */ |
47 | addiu a1, a1, -1 /* numlines-- */ |
48 | beqz v1, 1f /* if (!(linelen / 8)): tail */ |
49 | andi v0, a2, 0x7 /* v0 = linelen % 8 */ |
50 | pref 5, 0 (a0) /* cache-hint: store-streamed */ |
51 | |
52 | /* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */ |
53 | 2: addiu v1, v1, -1 |
54 | pref 5, 0(a0) |
55 | pref 5, 32(a0) |
56 | |
57 | lw t0, 0(a0) |
58 | lw t1, 4(a0) |
59 | lw t2, 8(a0) |
60 | lw t3, 12(a0) |
61 | srl t4, t0, 24 /* 00|00|00|A1 */ |
62 | replv.ph t5, t4 /* 00|A1|00|A1 */ |
63 | srl t6, t1, 24 /* 00|00|00|A2 */ |
64 | replv.ph t7, t6 /* 00|A2|00|A2 */ |
65 | muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */ |
66 | muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */ |
67 | muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */ |
68 | muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */ |
69 | srl t5, t2, 24 /* 00|00|00|A3 */ |
70 | replv.ph s0, t5 /* 00|A3|00|A3 */ |
71 | srl t7, t3, 24 /* 00|00|00|A4 */ |
72 | replv.ph s1, t7 /* 00|A4|00|A4 */ |
73 | muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */ |
74 | muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */ |
75 | muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */ |
76 | muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */ |
77 | preceu.ph.qbla s1, t8 |
78 | preceu.ph.qbla s3, t0 |
79 | addu.ph t8, t8, s1 |
80 | addu.ph t0, t0, s3 |
81 | preceu.ph.qbla s1, t9 |
82 | preceu.ph.qbla s3, t1 |
83 | addu.ph t9, t9, s1 |
84 | addu.ph t1, t1, s3 |
85 | preceu.ph.qbla s1, s2 |
86 | preceu.ph.qbla s3, t2 |
87 | addu.ph s2, s2, s1 |
88 | addu.ph t2, t2, s3 |
89 | preceu.ph.qbla s1, s0 |
90 | preceu.ph.qbla s3, t3 |
91 | addu.ph s0, s0, s1 |
92 | addu.ph t3, t3, s3 |
93 | shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */ |
94 | shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */ |
95 | shra_r.ph t9, t9, 8 |
96 | shra_r.ph t1, t1, 8 |
97 | shra_r.ph s2, s2, 8 |
98 | shra_r.ph t2, t2, 8 |
99 | shra_r.ph s0, s0, 8 |
100 | shra_r.ph t3, t3, 8 |
101 | precr.qb.ph t0, t8, t0 |
102 | precr.qb.ph t1, t9, t1 |
103 | precr.qb.ph t2, s2, t2 |
104 | precr.qb.ph t3, s0, t3 |
105 | append t4, t0, 24 |
106 | append t6, t1, 24 |
107 | append t5, t2, 24 |
108 | append t7, t3, 24 |
109 | sw t4, 0(a0) |
110 | sw t6, 4(a0) |
111 | sw t5, 8(a0) |
112 | sw t7, 12(a0) |
113 | |
114 | lw t0, 16(a0) |
115 | lw t1, 20(a0) |
116 | lw t2, 24(a0) |
117 | lw t3, 28(a0) |
118 | srl t4, t0, 24 /* 00|00|00|A1 */ |
119 | replv.ph t5, t4 /* 00|A1|00|A1 */ |
120 | srl t6, t1, 24 /* 00|00|00|A2 */ |
121 | replv.ph t7, t6 /* 00|A2|00|A2 */ |
122 | muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */ |
123 | muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */ |
124 | muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */ |
125 | muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */ |
126 | srl t5, t2, 24 /* 00|00|00|A3 */ |
127 | replv.ph s0, t5 /* 00|A3|00|A3 */ |
128 | srl t7, t3, 24 /* 00|00|00|A4 */ |
129 | replv.ph s1, t7 /* 00|A4|00|A4 */ |
130 | muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */ |
131 | muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */ |
132 | muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */ |
133 | muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */ |
134 | preceu.ph.qbla s1, t8 |
135 | preceu.ph.qbla s3, t0 |
136 | addu.ph t8, t8, s1 |
137 | addu.ph t0, t0, s3 |
138 | preceu.ph.qbla s1, t9 |
139 | preceu.ph.qbla s3, t1 |
140 | addu.ph t9, t9, s1 |
141 | addu.ph t1, t1, s3 |
142 | preceu.ph.qbla s1, s2 |
143 | preceu.ph.qbla s3, t2 |
144 | addu.ph s2, s2, s1 |
145 | addu.ph t2, t2, s3 |
146 | preceu.ph.qbla s1, s0 |
147 | preceu.ph.qbla s3, t3 |
148 | addu.ph s0, s0, s1 |
149 | addu.ph t3, t3, s3 |
150 | shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */ |
151 | shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */ |
152 | shra_r.ph t9, t9, 8 |
153 | shra_r.ph t1, t1, 8 |
154 | shra_r.ph s2, s2, 8 |
155 | shra_r.ph t2, t2, 8 |
156 | shra_r.ph s0, s0, 8 |
157 | shra_r.ph t3, t3, 8 |
158 | precr.qb.ph t0, t8, t0 |
159 | precr.qb.ph t1, t9, t1 |
160 | precr.qb.ph t2, s2, t2 |
161 | precr.qb.ph t3, s0, t3 |
162 | append t4, t0, 24 |
163 | append t6, t1, 24 |
164 | append t5, t2, 24 |
165 | append t7, t3, 24 |
166 | sw t4, 16(a0) |
167 | sw t6, 20(a0) |
168 | sw t5, 24(a0) |
169 | sw t7, 28(a0) |
170 | bgtz v1, 2b /* if (t1): unrolled loop */ |
171 | addiu a0, a0, 32 /* data += 8 */ |
172 | |
173 | beqz v0, 4f /* if (!v0): skip tail loop */ |
174 | nop |
175 | |
176 | /* tail loop, handles (len < 8), one pixel at a time */ |
177 | 1: lw t1, 0 (a0) |
178 | addiu v0, v0, -1 /* len-- */ |
179 | srl t2, t1, 24 /* t2 = alpha */ |
180 | replv.ph t3, t2 |
181 | muleu_s.ph.qbl t4, t1, t3 |
182 | muleu_s.ph.qbr t1, t1, t3 |
183 | preceu.ph.qbla t3, t4 |
184 | preceu.ph.qbla t5, t1 |
185 | addu.ph t4, t4, t3 |
186 | addu.ph t1, t1, t5 |
187 | shra_r.ph t4, t4, 8 |
188 | shra_r.ph t1, t1, 8 |
189 | precr.qb.ph t1, t4, t1 |
190 | append t2, t1, 24 |
191 | sw t2, 0(a0) |
192 | bgtz v0, 1b |
193 | addiu a0, a0, 4 /* src++ */ |
194 | |
195 | 4: bnez a1, 3b /* if (numlines): loop */ |
196 | addu a0, a0, a3 /* src += srclineskip */ |
197 | |
198 | 0: /* return */ |
199 | RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3 |
200 | |
201 | jr ra |
202 | nop |
203 | |
204 | END(premultiply_argb_inplace_mips_asm) |
205 | |
206 | |
207 | LEAF_MIPS_DSPR2(qt_convert_rgb888_to_rgb32_mips_dspr2_asm) |
208 | /* |
209 | * Parameters: |
210 | * a0 - dst *a8r8g8b8 |
211 | * a1 - src *r8g8b8 |
212 | * a2 - len |
213 | * |
214 | * R G B r g b R G B r g b R G B r g b . . . -- input |
215 | * ------- ------- ------- ------- ------- |
216 | * _ R G B _ r g b _ R G B _ r g b _ R G . . -- output |
217 | * |
218 | * Register usage: |
219 | * a2 - tail (len % 4) == (len & 0x3) |
220 | * t0 - batches (len / 4) == (len >> 2) |
221 | * t1-t7, s1-s3 - temporary |
222 | */ |
223 | |
224 | srl t0, a2, 2 /* batches = len / 4 */ |
225 | andi a2, a2, 0x3 /* tail = len % 4 */ |
226 | |
227 | beqz t0, 5f /* if !batches: tail */ |
228 | lui t7, 0xff00 /* [FF 00 00 00] */ |
229 | SAVE_REGS_ON_STACK 8, s1, s2, s3, s0, v0, v1 |
230 | |
231 | 1: pref 4, 0 (a1) /* hint: read-streamed */ |
232 | pref 5, 0 (a0) /* hint: prepare-write */ |
233 | addiu t0, t0, -1 /* batches-- */ |
234 | |
235 | lbu t1, 0 (a1) /* [__ __ __ R1] */ |
236 | lbu t2, 1 (a1) /* [__ __ __ G1] */ |
237 | lbu t3, 2 (a1) /* [__ __ __ B1] */ |
238 | |
239 | lbu t4, 3 (a1) /* [__ __ __ r2] */ |
240 | lbu t5, 4 (a1) /* [__ __ __ g2] */ |
241 | lbu t6, 5 (a1) /* [__ __ __ b2] */ |
242 | |
243 | lbu s1, 6 (a1) /* [__ __ __ R3] */ |
244 | lbu s2, 7 (a1) /* [__ __ __ G3] */ |
245 | lbu s3, 8 (a1) /* [__ __ __ B3] */ |
246 | |
247 | lbu s0, 9 (a1) /* [__ __ __ r4] */ |
248 | lbu v0, 10 (a1) /* [__ __ __ g4] */ |
249 | lbu v1, 11 (a1) /* [__ __ __ b4] */ |
250 | |
251 | append t1, t2, 8 /* [__ __ R1 G1] */ |
252 | append t4, t5, 8 /* [__ __ r2 g2] */ |
253 | append s1, s2, 8 /* [__ __ R3 G3] */ |
254 | append s0, v0, 8 /* [__ __ r4 g4] */ |
255 | append t1, t3, 8 /* [__ R1 G1 B1] */ |
256 | append t4, t6, 8 /* [__ r2 g2 b2] */ |
257 | append s1, s3, 8 /* [__ R3 G4 B3] */ |
258 | append s0, v1, 8 /* [__ r4 g4 b4] */ |
259 | or t1, t1, t7 /* [FF R1 G1 B1] */ |
260 | or t4, t4, t7 /* [FF r2 g2 b2] */ |
261 | or s1, s1, t7 /* [FF R3 G3 B3] */ |
262 | or s0, s0, t7 /* [FF r4 g4 b4] */ |
263 | |
264 | sw t1, 0 (a0) |
265 | sw t4, 4 (a0) |
266 | sw s1, 8 (a0) |
267 | sw s0, 12 (a0) |
268 | |
269 | addiu a1, a1, 12 /* src += 4*3 */ |
270 | bnez t0, 1b /* if batches: loop */ |
271 | addiu a0, a0, 16 /* dst += 4 */ |
272 | |
273 | RESTORE_REGS_FROM_STACK 8, s1, s2, s3, s0, v0, v1 |
274 | |
275 | /* handle remaining "tail" (a2) items */ |
276 | 5: beqz a2, 0f |
277 | lui t0, 0xff00 /* [FF __ __ __] */ |
278 | |
279 | 1: lbu t1, 0 (a1) /* [__ __ __ RR] */ |
280 | lbu t2, 1 (a1) /* [__ __ __ GG] */ |
281 | lbu t3, 2 (a1) /* [__ __ __ BB] */ |
282 | sll t1, t1, 16 /* [__ RR __ __] */ |
283 | sll t2, t2, 8 /* [__ __ GG __] */ |
284 | or t0, t0, t1 /* [FF RR __ __] */ |
285 | or t2, t2, t3 /* [__ __ GG BB] */ |
286 | addi a2, a2, -1 /* len-- */ |
287 | or t0, t0, t2 /* [FF RR GG BB] */ |
288 | addiu a1, a1, 3 /* src += 3 */ |
289 | sw t0, 0 (a0) |
290 | addiu a0, a0, 4 /* dst++ */ |
291 | bnez a2, 1b /* if tail: loop */ |
292 | lui t0, 0xff00 /* [FF __ __ __] */ |
293 | |
294 | 0: jr ra |
295 | nop |
296 | |
297 | END(qt_convert_rgb888_to_rgb32_mips_dspr2_asm) |
298 | |