1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2018 The Qt Company Ltd. |
4 | ** Copyright (C) 2018 Intel Corporation. |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the QtGui module of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:LGPL$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU Lesser General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
22 | ** packaging of this file. Please review the following information to |
23 | ** ensure the GNU Lesser General Public License version 3 requirements |
24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
25 | ** |
26 | ** GNU General Public License Usage |
27 | ** Alternatively, this file may be used under the terms of the GNU |
28 | ** General Public License version 2.0 or (at your option) the GNU General |
29 | ** Public license version 3 or any later version approved by the KDE Free |
30 | ** Qt Foundation. The licenses are as published by the Free Software |
31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
32 | ** included in the packaging of this file. Please review the following |
33 | ** information to ensure the GNU General Public License requirements will |
34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
35 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
36 | ** |
37 | ** $QT_END_LICENSE$ |
38 | ** |
39 | ****************************************************************************/ |
40 | |
41 | #include <qglobal.h> |
42 | |
43 | #include <qstylehints.h> |
44 | #include <qguiapplication.h> |
45 | #include <qatomic.h> |
46 | #include <private/qcolortrclut_p.h> |
47 | #include <private/qdrawhelper_p.h> |
48 | #include <private/qpaintengine_raster_p.h> |
49 | #include <private/qpainter_p.h> |
50 | #include <private/qdrawhelper_x86_p.h> |
51 | #include <private/qdrawingprimitive_sse2_p.h> |
52 | #include <private/qdrawhelper_neon_p.h> |
53 | #if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2) |
54 | #include <private/qdrawhelper_mips_dsp_p.h> |
55 | #endif |
56 | #include <private/qguiapplication_p.h> |
57 | #include <private/qrgba64_p.h> |
58 | #include <qendian.h> |
59 | #include <qloggingcategory.h> |
60 | #include <qmath.h> |
61 | |
62 | QT_BEGIN_NAMESPACE |
63 | |
64 | Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper" ) |
65 | |
66 | #define MASK(src, a) src = BYTE_MUL(src, a) |
67 | |
68 | /* |
69 | constants and structures |
70 | */ |
71 | |
72 | enum { |
73 | fixed_scale = 1 << 16, |
74 | half_point = 1 << 15 |
75 | }; |
76 | |
77 | template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth(); |
78 | template<QImage::Format> Q_DECL_CONSTEXPR uint redShift(); |
79 | template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth(); |
80 | template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift(); |
81 | template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth(); |
82 | template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift(); |
83 | template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth(); |
84 | template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift(); |
85 | |
86 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return 5; } |
87 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return 4; } |
88 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return 5; } |
89 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return 6; } |
90 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return 8; } |
91 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; } |
92 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } |
93 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } |
94 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } |
95 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; } |
96 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; } |
97 | template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
98 | |
99 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; } |
100 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; } |
101 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; } |
102 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return 12; } |
103 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return 16; } |
104 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return 8; } |
105 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; } |
106 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; } |
107 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; } |
108 | #if Q_BYTE_ORDER == Q_BIG_ENDIAN |
109 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; } |
110 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; } |
111 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } |
112 | #else |
113 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; } |
114 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; } |
115 | template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } |
116 | #endif |
117 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; } |
118 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; } |
119 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; } |
120 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return 6; } |
121 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return 8; } |
122 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; } |
123 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } |
124 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; } |
125 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } |
126 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; } |
127 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; } |
128 | template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
129 | |
130 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; } |
131 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; } |
132 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; } |
133 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return 6; } |
134 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return 8; } |
135 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return 4; } |
136 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; } |
137 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; } |
138 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; } |
139 | #if Q_BYTE_ORDER == Q_BIG_ENDIAN |
140 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; } |
141 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; } |
142 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } |
143 | #else |
144 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; } |
145 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; } |
146 | template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
147 | #endif |
148 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; } |
149 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; } |
150 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; } |
151 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return 6; } |
152 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return 8; } |
153 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; } |
154 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; } |
155 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; } |
156 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } |
157 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; } |
158 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; } |
159 | template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
160 | |
161 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; } |
162 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; } |
163 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; } |
164 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return 0; } |
165 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return 0; } |
166 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return 0; } |
167 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; } |
168 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; } |
169 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; } |
170 | #if Q_BYTE_ORDER == Q_BIG_ENDIAN |
171 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; } |
172 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; } |
173 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
174 | #else |
175 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; } |
176 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; } |
177 | template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; } |
178 | #endif |
179 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; } |
180 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; } |
181 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; } |
182 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return 0; } |
183 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return 0; } |
184 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; } |
185 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; } |
186 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; } |
187 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; } |
188 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; } |
189 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; } |
190 | template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; } |
191 | |
192 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; } |
193 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; } |
194 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; } |
195 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return 0; } |
196 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return 0; } |
197 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return 12; } |
198 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; } |
199 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; } |
200 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; } |
201 | #if Q_BYTE_ORDER == Q_BIG_ENDIAN |
202 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; } |
203 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; } |
204 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; } |
205 | #else |
206 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; } |
207 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; } |
208 | template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; } |
209 | #endif |
210 | |
211 | template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel(); |
212 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; } |
213 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; } |
214 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; } |
215 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; } |
216 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; } |
217 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; } |
218 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; } |
219 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; } |
220 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; } |
221 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; } |
222 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; } |
223 | template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; } |
224 | |
225 | |
226 | typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count); |
227 | |
228 | template <QPixelLayout::BPP bpp> static |
229 | uint QT_FASTCALL fetchPixel(const uchar *, int) |
230 | { |
231 | Q_UNREACHABLE(); |
232 | return 0; |
233 | } |
234 | |
235 | template <> |
236 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index) |
237 | { |
238 | return (src[index >> 3] >> (index & 7)) & 1; |
239 | } |
240 | |
241 | template <> |
242 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index) |
243 | { |
244 | return (src[index >> 3] >> (~index & 7)) & 1; |
245 | } |
246 | |
247 | template <> |
248 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index) |
249 | { |
250 | return src[index]; |
251 | } |
252 | |
253 | template <> |
254 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index) |
255 | { |
256 | return reinterpret_cast<const quint16 *>(src)[index]; |
257 | } |
258 | |
259 | template <> |
260 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index) |
261 | { |
262 | return reinterpret_cast<const quint24 *>(src)[index]; |
263 | } |
264 | |
265 | template <> |
266 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index) |
267 | { |
268 | return reinterpret_cast<const uint *>(src)[index]; |
269 | } |
270 | |
271 | template <> |
272 | inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index) |
273 | { |
274 | // We have to do the conversion in fetch to fit into a 32bit uint |
275 | QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index]; |
276 | return c.toArgb32(); |
277 | } |
278 | |
279 | template <QPixelLayout::BPP bpp> |
280 | static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index) |
281 | { |
282 | Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64); |
283 | return fetchPixel<bpp>(src, index); |
284 | } |
285 | |
286 | template <QPixelLayout::BPP width> static |
287 | void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel); |
288 | |
289 | template <> |
290 | inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel) |
291 | { |
292 | reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel); |
293 | } |
294 | |
295 | template <> |
296 | inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel) |
297 | { |
298 | reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel); |
299 | } |
300 | |
301 | typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index); |
302 | |
303 | static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = { |
304 | 0, // BPPNone |
305 | fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB |
306 | fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB |
307 | fetchPixel<QPixelLayout::BPP8>, // BPP8 |
308 | fetchPixel<QPixelLayout::BPP16>, // BPP16 |
309 | fetchPixel<QPixelLayout::BPP24>, // BPP24 |
310 | fetchPixel<QPixelLayout::BPP32>, // BPP32 |
311 | fetchPixel<QPixelLayout::BPP64> // BPP64 |
312 | }; |
313 | |
314 | template<QImage::Format Format> |
315 | static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s) |
316 | { |
317 | Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); |
318 | Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); |
319 | Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); |
320 | |
321 | Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); |
322 | Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); |
323 | Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); |
324 | |
325 | Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; |
326 | Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; |
327 | Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; |
328 | |
329 | uint red = (s >> redShift<Format>()) & redMask; |
330 | uint green = (s >> greenShift<Format>()) & greenMask; |
331 | uint blue = (s >> blueShift<Format>()) & blueMask; |
332 | |
333 | red = ((red << redLeftShift) | (red >> redRightShift)) << 16; |
334 | green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8; |
335 | blue = (blue << blueLeftShift) | (blue >> blueRightShift); |
336 | return 0xff000000 | red | green | blue; |
337 | } |
338 | |
339 | template<QImage::Format Format> |
340 | static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *) |
341 | { |
342 | for (int i = 0; i < count; ++i) |
343 | buffer[i] = convertPixelToRGB32<Format>(buffer[i]); |
344 | } |
345 | |
346 | #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 |
347 | extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count); |
348 | #endif |
349 | |
350 | template<QImage::Format Format> |
351 | static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count, |
352 | const QVector<QRgb> *, QDitherInfo *) |
353 | { |
354 | constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); |
355 | #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 |
356 | if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) { |
357 | // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 |
358 | // to vectorize the deforested version below. |
359 | fetchPixelsBPP24_ssse3(buffer, src, index, count); |
360 | convertToRGB32<Format>(buffer, count, nullptr); |
361 | return buffer; |
362 | } |
363 | #endif |
364 | for (int i = 0; i < count; ++i) |
365 | buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i)); |
366 | return buffer; |
367 | } |
368 | |
369 | template<QImage::Format Format> |
370 | static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s) |
371 | { |
372 | return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s)); |
373 | } |
374 | |
375 | template<QImage::Format Format> |
376 | static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count, |
377 | const QVector<QRgb> *, QDitherInfo *) |
378 | { |
379 | for (int i = 0; i < count; ++i) |
380 | buffer[i] = convertPixelToRGB64<Format>(src[i]); |
381 | return buffer; |
382 | } |
383 | |
384 | template<QImage::Format Format> |
385 | static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, |
386 | const QVector<QRgb> *, QDitherInfo *) |
387 | { |
388 | for (int i = 0; i < count; ++i) |
389 | buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i)); |
390 | return buffer; |
391 | } |
392 | |
393 | template<QImage::Format Format> |
394 | static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s) |
395 | { |
396 | Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1); |
397 | Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1); |
398 | Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1); |
399 | Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1); |
400 | |
401 | Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>(); |
402 | Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>(); |
403 | Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>(); |
404 | Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>(); |
405 | |
406 | Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8; |
407 | Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8; |
408 | Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8; |
409 | Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8; |
410 | |
411 | Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) || |
412 | (alphaWidth<Format>() != greenWidth<Format>()) || |
413 | (alphaWidth<Format>() != blueWidth<Format>()); |
414 | |
415 | uint alpha = (s >> alphaShift<Format>()) & alphaMask; |
416 | uint red = (s >> redShift<Format>()) & redMask; |
417 | uint green = (s >> greenShift<Format>()) & greenMask; |
418 | uint blue = (s >> blueShift<Format>()) & blueMask; |
419 | |
420 | alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift); |
421 | red = (red << redLeftShift) | (red >> redRightShift); |
422 | green = (green << greenLeftShift) | (green >> greenRightShift); |
423 | blue = (blue << blueLeftShift) | (blue >> blueRightShift); |
424 | |
425 | if (mustMin) { |
426 | red = qMin(alpha, red); |
427 | green = qMin(alpha, green); |
428 | blue = qMin(alpha, blue); |
429 | } |
430 | |
431 | return (alpha << 24) | (red << 16) | (green << 8) | blue; |
432 | } |
433 | |
434 | template<QImage::Format Format> |
435 | static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) |
436 | { |
437 | for (int i = 0; i < count; ++i) |
438 | buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]); |
439 | } |
440 | |
441 | template<QImage::Format Format> |
442 | static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
443 | const QVector<QRgb> *, QDitherInfo *) |
444 | { |
445 | constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); |
446 | #if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3 |
447 | if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) { |
448 | // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3 |
449 | // to vectorize the deforested version below. |
450 | fetchPixelsBPP24_ssse3(buffer, src, index, count); |
451 | convertARGBPMToARGB32PM<Format>(buffer, count, nullptr); |
452 | return buffer; |
453 | } |
454 | #endif |
455 | for (int i = 0; i < count; ++i) |
456 | buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i)); |
457 | return buffer; |
458 | } |
459 | |
460 | template<QImage::Format Format> |
461 | static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s) |
462 | { |
463 | return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s)); |
464 | } |
465 | |
466 | template<QImage::Format Format> |
467 | static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
468 | const QVector<QRgb> *, QDitherInfo *) |
469 | { |
470 | for (int i = 0; i < count; ++i) |
471 | buffer[i] = convertPixelToRGB64<Format>(src[i]); |
472 | return buffer; |
473 | } |
474 | |
475 | template<QImage::Format Format> |
476 | static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
477 | const QVector<QRgb> *, QDitherInfo *) |
478 | { |
479 | constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>(); |
480 | for (int i = 0; i < count; ++i) |
481 | buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i)); |
482 | return buffer; |
483 | } |
484 | |
485 | template<QImage::Format Format, bool fromRGB> |
486 | static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count, |
487 | const QVector<QRgb> *, QDitherInfo *dither) |
488 | { |
489 | Q_CONSTEXPR uchar rWidth = redWidth<Format>(); |
490 | Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); |
491 | Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); |
492 | constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); |
493 | |
494 | // RGB32 -> RGB888 is not a precision loss. |
495 | if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) { |
496 | Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; |
497 | Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; |
498 | Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; |
499 | Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); |
500 | Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); |
501 | Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); |
502 | |
503 | for (int i = 0; i < count; ++i) { |
504 | const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]); |
505 | const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); |
506 | const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); |
507 | const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); |
508 | storePixel<BPP>(dest, index + i, r | g | b); |
509 | }; |
510 | } else { |
511 | // We do ordered dither by using a rounding conversion, but instead of |
512 | // adding half of input precision, we add the adjusted result from the |
513 | // bayer matrix before narrowing. |
514 | // Note: Rounding conversion in itself is different from the naive |
515 | // conversion we do above for non-dithering. |
516 | const uint *bayer_line = qt_bayer_matrix[dither->y & 15]; |
517 | for (int i = 0; i < count; ++i) { |
518 | const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]); |
519 | const int d = bayer_line[(dither->x + i) & 15]; |
520 | const int dr = d - ((d + 1) >> rWidth); |
521 | const int dg = d - ((d + 1) >> gWidth); |
522 | const int db = d - ((d + 1) >> bWidth); |
523 | int r = qRed(c); |
524 | int g = qGreen(c); |
525 | int b = qBlue(c); |
526 | r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); |
527 | g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); |
528 | b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); |
529 | const uint s = (r << redShift<Format>()) |
530 | | (g << greenShift<Format>()) |
531 | | (b << blueShift<Format>()); |
532 | storePixel<BPP>(dest, index + i, s); |
533 | } |
534 | } |
535 | } |
536 | |
537 | template<QImage::Format Format, bool fromRGB> |
538 | static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count, |
539 | const QVector<QRgb> *, QDitherInfo *dither) |
540 | { |
541 | constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>(); |
542 | if (!dither) { |
543 | Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1; |
544 | Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1; |
545 | Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1; |
546 | Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1; |
547 | |
548 | Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>(); |
549 | Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>(); |
550 | Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>(); |
551 | Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>(); |
552 | |
553 | Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>(); |
554 | for (int i = 0; i < count; ++i) { |
555 | const uint c = src[i]; |
556 | const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>()); |
557 | const uint r = ((c >> rRightShift) & rMask) << redShift<Format>(); |
558 | const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>(); |
559 | const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>(); |
560 | storePixel<BPP>(dest, index + i, a | r | g | b); |
561 | }; |
562 | } else { |
563 | Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); |
564 | Q_CONSTEXPR uchar rWidth = redWidth<Format>(); |
565 | Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); |
566 | Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); |
567 | |
568 | const uint *bayer_line = qt_bayer_matrix[dither->y & 15]; |
569 | for (int i = 0; i < count; ++i) { |
570 | const uint c = src[i]; |
571 | const int d = bayer_line[(dither->x + i) & 15]; |
572 | const int da = d - ((d + 1) >> aWidth); |
573 | const int dr = d - ((d + 1) >> rWidth); |
574 | const int dg = d - ((d + 1) >> gWidth); |
575 | const int db = d - ((d + 1) >> bWidth); |
576 | int a = qAlpha(c); |
577 | int r = qRed(c); |
578 | int g = qGreen(c); |
579 | int b = qBlue(c); |
580 | if (fromRGB) |
581 | a = (1 << aWidth) - 1; |
582 | else |
583 | a = (a + ((da - a) >> aWidth) + 1) >> (8 - aWidth); |
584 | r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth); |
585 | g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth); |
586 | b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth); |
587 | uint s = (a << alphaShift<Format>()) |
588 | | (r << redShift<Format>()) |
589 | | (g << greenShift<Format>()) |
590 | | (b << blueShift<Format>()); |
591 | storePixel<BPP>(dest, index + i, s); |
592 | } |
593 | } |
594 | } |
595 | |
596 | template<QImage::Format Format> |
597 | static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count) |
598 | { |
599 | Q_CONSTEXPR uchar aWidth = alphaWidth<Format>(); |
600 | Q_CONSTEXPR uchar aShift = alphaShift<Format>(); |
601 | Q_CONSTEXPR uchar rWidth = redWidth<Format>(); |
602 | Q_CONSTEXPR uchar rShift = redShift<Format>(); |
603 | Q_CONSTEXPR uchar gWidth = greenWidth<Format>(); |
604 | Q_CONSTEXPR uchar gShift = greenShift<Format>(); |
605 | Q_CONSTEXPR uchar bWidth = blueWidth<Format>(); |
606 | Q_CONSTEXPR uchar bShift = blueShift<Format>(); |
607 | #ifdef Q_COMPILER_CONSTEXPR |
608 | Q_STATIC_ASSERT(rWidth == bWidth); |
609 | #endif |
610 | Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1; |
611 | Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift) |
612 | | (((1 << gWidth) - 1) << gShift); |
613 | constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>(); |
614 | |
615 | for (int i = 0; i < count; ++i) { |
616 | const uint c = fetchPixel<bpp>(src, i); |
617 | const uint r = (c >> rShift) & redBlueMask; |
618 | const uint b = (c >> bShift) & redBlueMask; |
619 | const uint t = (c & alphaGreenMask) |
620 | | (r << bShift) |
621 | | (b << rShift); |
622 | storePixel<bpp>(dst, i, t); |
623 | } |
624 | } |
625 | |
626 | static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count) |
627 | { |
628 | const uint *src = reinterpret_cast<const uint *>(s); |
629 | uint *dest = reinterpret_cast<uint *>(d); |
630 | for (int i = 0; i < count; ++i) { |
631 | const uint c = src[i]; |
632 | const uint ag = c & 0xff00ff00; |
633 | const uint rb = c & 0x00ff00ff; |
634 | dest[i] = ag | (rb << 16) | (rb >> 16); |
635 | } |
636 | } |
637 | |
638 | #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN |
639 | template<> |
640 | void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count) |
641 | { |
642 | return rbSwap_rgb32(d, s, count); |
643 | } |
644 | #else |
645 | template<> |
646 | void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count) |
647 | { |
648 | const uint *src = reinterpret_cast<const uint *>(s); |
649 | uint *dest = reinterpret_cast<uint *>(d); |
650 | for (int i = 0; i < count; ++i) { |
651 | const uint c = src[i]; |
652 | const uint rb = c & 0xff00ff00; |
653 | const uint ga = c & 0x00ff00ff; |
654 | dest[i] = ga | (rb << 16) | (rb >> 16); |
655 | } |
656 | } |
657 | #endif |
658 | |
659 | static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count) |
660 | { |
661 | const uint *src = reinterpret_cast<const uint *>(s); |
662 | uint *dest = reinterpret_cast<uint *>(d); |
663 | for (int i = 0; i < count; ++i) |
664 | dest[i] = qRgbSwapRgb30(src[i]); |
665 | } |
666 | |
667 | template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB() |
668 | { |
669 | return QPixelLayout{ |
670 | false, |
671 | false, |
672 | bitsPerPixel<Format>(), |
673 | rbSwap<Format>, |
674 | convertToRGB32<Format>, |
675 | convertToRGB64<Format>, |
676 | fetchRGBToRGB32<Format>, |
677 | fetchRGBToRGB64<Format>, |
678 | storeRGBFromARGB32PM<Format, false>, |
679 | storeRGBFromARGB32PM<Format, true> |
680 | }; |
681 | } |
682 | |
683 | template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM() |
684 | { |
685 | return QPixelLayout{ |
686 | true, |
687 | true, |
688 | bitsPerPixel<Format>(), |
689 | rbSwap<Format>, |
690 | convertARGBPMToARGB32PM<Format>, |
691 | convertARGBPMToRGBA64PM<Format>, |
692 | fetchARGBPMToARGB32PM<Format>, |
693 | fetchARGBPMToRGBA64PM<Format>, |
694 | storeARGBPMFromARGB32PM<Format, false>, |
695 | storeARGBPMFromARGB32PM<Format, true> |
696 | }; |
697 | } |
698 | |
699 | static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut) |
700 | { |
701 | for (int i = 0; i < count; ++i) |
702 | buffer[i] = qPremultiply(clut->at(buffer[i])); |
703 | } |
704 | |
705 | template<QPixelLayout::BPP BPP> |
706 | static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
707 | const QVector<QRgb> *clut, QDitherInfo *) |
708 | { |
709 | for (int i = 0; i < count; ++i) { |
710 | const uint s = fetchPixel<BPP>(src, index + i); |
711 | buffer[i] = qPremultiply(clut->at(s)); |
712 | } |
713 | return buffer; |
714 | } |
715 | |
716 | template<QPixelLayout::BPP BPP> |
717 | static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
718 | const QVector<QRgb> *clut, QDitherInfo *) |
719 | { |
720 | for (int i = 0; i < count; ++i) { |
721 | const uint s = fetchPixel<BPP>(src, index + i); |
722 | buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied(); |
723 | } |
724 | return buffer; |
725 | } |
726 | |
727 | static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
728 | const QVector<QRgb> *clut, QDitherInfo *) |
729 | { |
730 | for (int i = 0; i < count; ++i) |
731 | buffer[i] = QRgba64::fromArgb32(clut->at(src[i])).premultiplied(); |
732 | return buffer; |
733 | } |
734 | |
735 | static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *) |
736 | { |
737 | } |
738 | |
739 | static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int, |
740 | const QVector<QRgb> *, QDitherInfo *) |
741 | { |
742 | return reinterpret_cast<const uint *>(src) + index; |
743 | } |
744 | |
745 | static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int, |
746 | const QVector<QRgb> *, QDitherInfo *) |
747 | { |
748 | return reinterpret_cast<const QRgba64 *>(src) + index; |
749 | } |
750 | |
751 | static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count, |
752 | const QVector<QRgb> *, QDitherInfo *) |
753 | { |
754 | uint *d = reinterpret_cast<uint *>(dest) + index; |
755 | if (d != src) |
756 | memcpy(d, src, count * sizeof(uint)); |
757 | } |
758 | |
759 | static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) |
760 | { |
761 | qt_convertARGB32ToARGB32PM(buffer, buffer, count); |
762 | } |
763 | |
764 | static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
765 | const QVector<QRgb> *, QDitherInfo *) |
766 | { |
767 | return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); |
768 | } |
769 | |
770 | static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) |
771 | { |
772 | for (int i = 0; i < count; ++i) |
773 | buffer[i] = RGBA2ARGB(buffer[i]); |
774 | } |
775 | |
776 | static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
777 | const QVector<QRgb> *, QDitherInfo *) |
778 | { |
779 | const uint *s = reinterpret_cast<const uint *>(src) + index; |
780 | UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB); |
781 | return buffer; |
782 | } |
783 | |
784 | static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) |
785 | { |
786 | qt_convertRGBA8888ToARGB32PM(buffer, buffer, count); |
787 | } |
788 | |
789 | static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
790 | const QVector<QRgb> *, QDitherInfo *) |
791 | { |
792 | return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count); |
793 | } |
794 | |
795 | static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) |
796 | { |
797 | for (int i = 0; i < count; ++i) |
798 | buffer[i] = qRgba(0, 0, 0, buffer[i]); |
799 | } |
800 | |
801 | static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count, |
802 | const QVector<QRgb> *, QDitherInfo *) |
803 | { |
804 | for (int i = 0; i < count; ++i) |
805 | buffer[i] = qRgba(0, 0, 0, src[index + i]); |
806 | return buffer; |
807 | } |
808 | |
809 | static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count, |
810 | const QVector<QRgb> *, QDitherInfo *) |
811 | { |
812 | for (int i = 0; i < count; ++i) |
813 | buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]); |
814 | return buffer; |
815 | } |
816 | static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, |
817 | const QVector<QRgb> *, QDitherInfo *) |
818 | { |
819 | for (int i = 0; i < count; ++i) |
820 | buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]); |
821 | return buffer; |
822 | } |
823 | |
824 | static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *) |
825 | { |
826 | for (int i = 0; i < count; ++i) { |
827 | const uint s = buffer[i]; |
828 | buffer[i] = qRgb(s, s, s); |
829 | } |
830 | } |
831 | |
832 | static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count, |
833 | const QVector<QRgb> *, QDitherInfo *) |
834 | { |
835 | for (int i = 0; i < count; ++i) { |
836 | const uint s = src[index + i]; |
837 | buffer[i] = qRgb(s, s, s); |
838 | } |
839 | return buffer; |
840 | } |
841 | |
842 | static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count, |
843 | const QVector<QRgb> *, QDitherInfo *) |
844 | { |
845 | for (int i = 0; i < count; ++i) |
846 | buffer[i] = QRgba64::fromRgba(src[i], src[i], src[i], 255); |
847 | return buffer; |
848 | } |
849 | |
850 | static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, |
851 | const QVector<QRgb> *, QDitherInfo *) |
852 | { |
853 | for (int i = 0; i < count; ++i) { |
854 | const uint s = src[index + i]; |
855 | buffer[i] = QRgba64::fromRgba(s, s, s, 255); |
856 | } |
857 | return buffer; |
858 | } |
859 | |
860 | static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *) |
861 | { |
862 | for (int i = 0; i < count; ++i) { |
863 | const uint x = qt_div_257(buffer[i]); |
864 | buffer[i] = qRgb(x, x, x); |
865 | } |
866 | } |
867 | |
868 | static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count, |
869 | const QVector<QRgb> *, QDitherInfo *) |
870 | { |
871 | const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index; |
872 | for (int i = 0; i < count; ++i) { |
873 | const uint x = qt_div_257(s[i]); |
874 | buffer[i] = qRgb(x, x, x); |
875 | } |
876 | return buffer; |
877 | } |
878 | |
879 | static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count, |
880 | const QVector<QRgb> *, QDitherInfo *) |
881 | { |
882 | const unsigned short *s = reinterpret_cast<const unsigned short *>(src); |
883 | for (int i = 0; i < count; ++i) |
884 | buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535); |
885 | return buffer; |
886 | } |
887 | |
888 | static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count, |
889 | const QVector<QRgb> *, QDitherInfo *) |
890 | { |
891 | const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index; |
892 | for (int i = 0; i < count; ++i) { |
893 | buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535); |
894 | } |
895 | return buffer; |
896 | } |
897 | |
898 | static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
899 | const QVector<QRgb> *, QDitherInfo *) |
900 | { |
901 | uint *d = reinterpret_cast<uint *>(dest) + index; |
902 | UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); }); |
903 | } |
904 | |
905 | static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, |
906 | const QVector<QRgb> *, QDitherInfo *) |
907 | { |
908 | uint *d = reinterpret_cast<uint *>(dest) + index; |
909 | UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA); |
910 | } |
911 | |
912 | #ifdef __SSE2__ |
913 | template<bool RGBA, bool maskAlpha> |
914 | static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count) |
915 | { |
916 | if (count <= 0) |
917 | return; |
918 | |
919 | const __m128i amask = _mm_set1_epi32(0xff000000); |
920 | int i = 0; |
921 | for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) { |
922 | uint s = *src++; |
923 | if (maskAlpha) |
924 | s = s | 0xff000000; |
925 | if (RGBA) |
926 | s = RGBA2ARGB(s); |
927 | *buffer++ = QRgba64::fromArgb32(s); |
928 | } |
929 | for (; i < count-3; i += 4) { |
930 | __m128i vs = _mm_loadu_si128((const __m128i*)src); |
931 | if (maskAlpha) |
932 | vs = _mm_or_si128(vs, amask); |
933 | src += 4; |
934 | __m128i v1 = _mm_unpacklo_epi8(vs, vs); |
935 | __m128i v2 = _mm_unpackhi_epi8(vs, vs); |
936 | if (!RGBA) { |
937 | v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2)); |
938 | v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2)); |
939 | v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2)); |
940 | v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2)); |
941 | } |
942 | _mm_store_si128((__m128i*)(buffer), v1); |
943 | buffer += 2; |
944 | _mm_store_si128((__m128i*)(buffer), v2); |
945 | buffer += 2; |
946 | } |
947 | |
948 | SIMD_EPILOGUE(i, count, 3) { |
949 | uint s = *src++; |
950 | if (maskAlpha) |
951 | s = s | 0xff000000; |
952 | if (RGBA) |
953 | s = RGBA2ARGB(s); |
954 | *buffer++ = QRgba64::fromArgb32(s); |
955 | } |
956 | } |
957 | |
958 | template<QtPixelOrder PixelOrder> |
959 | static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count) |
960 | { |
961 | const __m128i gmask = _mm_set1_epi32(0x000ffc00); |
962 | const __m128i cmask = _mm_set1_epi32(0x000003ff); |
963 | int i = 0; |
964 | __m128i vr, vg, vb, va; |
965 | for (; i < count && uintptr_t(buffer) & 0xF; ++i) { |
966 | *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); |
967 | } |
968 | |
969 | for (; i < count-15; i += 16) { |
970 | // Repremultiplying is really expensive and hard to do in SIMD without AVX2, |
971 | // so we try to avoid it by checking if it is needed 16 samples at a time. |
972 | __m128i vOr = _mm_set1_epi32(0); |
973 | __m128i vAnd = _mm_set1_epi32(0xffffffff); |
974 | for (int j = 0; j < 16; j += 2) { |
975 | __m128i vs = _mm_load_si128((const __m128i*)(buffer + j)); |
976 | vOr = _mm_or_si128(vOr, vs); |
977 | vAnd = _mm_and_si128(vAnd, vs); |
978 | } |
979 | const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7)); |
980 | const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7)); |
981 | |
982 | if (andAlpha == 0xffff) { |
983 | for (int j = 0; j < 16; j += 2) { |
984 | __m128i vs = _mm_load_si128((const __m128i*)buffer); |
985 | buffer += 2; |
986 | vr = _mm_srli_epi64(vs, 6); |
987 | vg = _mm_srli_epi64(vs, 16 + 6 - 10); |
988 | vb = _mm_srli_epi64(vs, 32 + 6); |
989 | vr = _mm_and_si128(vr, cmask); |
990 | vg = _mm_and_si128(vg, gmask); |
991 | vb = _mm_and_si128(vb, cmask); |
992 | va = _mm_srli_epi64(vs, 48 + 14); |
993 | if (PixelOrder == PixelOrderRGB) |
994 | vr = _mm_slli_epi32(vr, 20); |
995 | else |
996 | vb = _mm_slli_epi32(vb, 20); |
997 | va = _mm_slli_epi32(va, 30); |
998 | __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va)); |
999 | vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0)); |
1000 | _mm_storel_epi64((__m128i*)dest, vd); |
1001 | dest += 2; |
1002 | } |
1003 | } else if (orAlpha == 0) { |
1004 | for (int j = 0; j < 16; ++j) { |
1005 | *dest++ = 0; |
1006 | buffer++; |
1007 | } |
1008 | } else { |
1009 | for (int j = 0; j < 16; ++j) |
1010 | *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); |
1011 | } |
1012 | } |
1013 | |
1014 | SIMD_EPILOGUE(i, count, 15) |
1015 | *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++); |
1016 | } |
1017 | #elif defined(__ARM_NEON__) |
1018 | template<bool RGBA, bool maskAlpha> |
1019 | static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count) |
1020 | { |
1021 | if (count <= 0) |
1022 | return; |
1023 | |
1024 | const uint32x4_t amask = vdupq_n_u32(0xff000000); |
1025 | #if defined(Q_PROCESSOR_ARM_64) |
1026 | const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15}; |
1027 | #else |
1028 | const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 }; |
1029 | #endif |
1030 | int i = 0; |
1031 | for (; i < count-3; i += 4) { |
1032 | uint32x4_t vs32 = vld1q_u32(src); |
1033 | src += 4; |
1034 | if (maskAlpha) |
1035 | vs32 = vorrq_u32(vs32, amask); |
1036 | uint8x16_t vs8 = vreinterpretq_u8_u32(vs32); |
1037 | if (!RGBA) { |
1038 | #if defined(Q_PROCESSOR_ARM_64) |
1039 | vs8 = vqtbl1q_u8(vs8, rgbaMask); |
1040 | #else |
1041 | // no vqtbl1q_u8 |
1042 | const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask); |
1043 | const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask); |
1044 | vs8 = vcombine_u8(vlo, vhi); |
1045 | #endif |
1046 | } |
1047 | uint8x16x2_t v = vzipq_u8(vs8, vs8); |
1048 | |
1049 | vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0])); |
1050 | buffer += 2; |
1051 | vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1])); |
1052 | buffer += 2; |
1053 | } |
1054 | |
1055 | SIMD_EPILOGUE(i, count, 3) { |
1056 | uint s = *src++; |
1057 | if (maskAlpha) |
1058 | s = s | 0xff000000; |
1059 | if (RGBA) |
1060 | s = RGBA2ARGB(s); |
1061 | *buffer++ = QRgba64::fromArgb32(s); |
1062 | } |
1063 | } |
1064 | #endif |
1065 | |
1066 | static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count, |
1067 | const QVector<QRgb> *, QDitherInfo *) |
1068 | { |
1069 | #ifdef __SSE2__ |
1070 | qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count); |
1071 | #elif defined(__ARM_NEON__) |
1072 | qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count); |
1073 | #else |
1074 | for (int i = 0; i < count; ++i) |
1075 | buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]); |
1076 | #endif |
1077 | return buffer; |
1078 | } |
1079 | |
1080 | static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count, |
1081 | const QVector<QRgb> *, QDitherInfo *) |
1082 | { |
1083 | return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1084 | } |
1085 | |
1086 | static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
1087 | const QVector<QRgb> *, QDitherInfo *) |
1088 | { |
1089 | for (int i = 0; i < count; ++i) |
1090 | buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied(); |
1091 | return buffer; |
1092 | } |
1093 | |
1094 | static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1095 | const QVector<QRgb> *, QDitherInfo *) |
1096 | { |
1097 | return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1098 | } |
1099 | |
1100 | static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
1101 | const QVector<QRgb> *, QDitherInfo *) |
1102 | { |
1103 | #ifdef __SSE2__ |
1104 | qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count); |
1105 | #elif defined(__ARM_NEON__) |
1106 | qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count); |
1107 | #else |
1108 | for (int i = 0; i < count; ++i) |
1109 | buffer[i] = QRgba64::fromArgb32(src[i]); |
1110 | #endif |
1111 | return buffer; |
1112 | } |
1113 | |
1114 | static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1115 | const QVector<QRgb> *, QDitherInfo *) |
1116 | { |
1117 | return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1118 | } |
1119 | |
1120 | #if QT_CONFIG(raster_64bit) |
1121 | static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count) |
1122 | { |
1123 | for (int i = 0; i < count; ++i) |
1124 | buffer[i] = buffer[i].premultiplied(); |
1125 | } |
1126 | |
1127 | static void convertRGBA64PMToRGBA64PM(QRgba64 *, int) |
1128 | { |
1129 | } |
1130 | #endif |
1131 | |
1132 | static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1133 | const QVector<QRgb> *, QDitherInfo *) |
1134 | { |
1135 | const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; |
1136 | for (int i = 0; i < count; ++i) |
1137 | buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied(); |
1138 | return buffer; |
1139 | } |
1140 | |
1141 | static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
1142 | const QVector<QRgb> *, QDitherInfo *) |
1143 | { |
1144 | for (int i = 0; i < count; ++i) |
1145 | buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied(); |
1146 | return buffer; |
1147 | } |
1148 | |
1149 | static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1150 | const QVector<QRgb> *, QDitherInfo *) |
1151 | { |
1152 | return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1153 | } |
1154 | |
1155 | static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
1156 | const QVector<QRgb> *, QDitherInfo *) |
1157 | { |
1158 | #ifdef __SSE2__ |
1159 | qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count); |
1160 | #elif defined(__ARM_NEON__) |
1161 | qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count); |
1162 | #else |
1163 | for (int i = 0; i < count; ++i) |
1164 | buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])); |
1165 | #endif |
1166 | return buffer; |
1167 | } |
1168 | |
1169 | static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1170 | const QVector<QRgb> *, QDitherInfo *) |
1171 | { |
1172 | return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1173 | } |
1174 | |
1175 | static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1176 | const QVector<QRgb> *, QDitherInfo *) |
1177 | { |
1178 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1179 | UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); }); |
1180 | } |
1181 | |
1182 | static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count, |
1183 | const QVector<QRgb> *, QDitherInfo *) |
1184 | { |
1185 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1186 | UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); }); |
1187 | } |
1188 | |
1189 | static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1190 | const QVector<QRgb> *, QDitherInfo *) |
1191 | { |
1192 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1193 | UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); }); |
1194 | } |
1195 | |
1196 | template<QtPixelOrder PixelOrder> |
1197 | static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *) |
1198 | { |
1199 | for (int i = 0; i < count; ++i) |
1200 | buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]); |
1201 | } |
1202 | |
1203 | template<QtPixelOrder PixelOrder> |
1204 | static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count, |
1205 | const QVector<QRgb> *, QDitherInfo *dither) |
1206 | { |
1207 | const uint *src = reinterpret_cast<const uint *>(s) + index; |
1208 | if (!dither) { |
1209 | UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>); |
1210 | } else { |
1211 | for (int i = 0; i < count; ++i) { |
1212 | const uint c = src[i]; |
1213 | short d10 = (qt_bayer_matrix[dither->y & 15][(dither->x + i) & 15] << 2); |
1214 | short a10 = (c >> 30) * 0x155; |
1215 | short r10 = ((c >> 20) & 0x3ff); |
1216 | short g10 = ((c >> 10) & 0x3ff); |
1217 | short b10 = (c & 0x3ff); |
1218 | if (PixelOrder == PixelOrderBGR) |
1219 | std::swap(r10, b10); |
1220 | short a8 = (a10 + ((d10 - a10) >> 8)) >> 2; |
1221 | short r8 = (r10 + ((d10 - r10) >> 8)) >> 2; |
1222 | short g8 = (g10 + ((d10 - g10) >> 8)) >> 2; |
1223 | short b8 = (b10 + ((d10 - b10) >> 8)) >> 2; |
1224 | buffer[i] = qRgba(r8, g8, b8, a8); |
1225 | } |
1226 | } |
1227 | return buffer; |
1228 | } |
1229 | |
1230 | #ifdef __SSE2__ |
1231 | template<QtPixelOrder PixelOrder> |
1232 | static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count) |
1233 | { |
1234 | if (count <= 0) |
1235 | return; |
1236 | |
1237 | const __m128i rmask = _mm_set1_epi32(0x3ff00000); |
1238 | const __m128i gmask = _mm_set1_epi32(0x000ffc00); |
1239 | const __m128i bmask = _mm_set1_epi32(0x000003ff); |
1240 | const __m128i afactor = _mm_set1_epi16(0x5555); |
1241 | int i = 0; |
1242 | |
1243 | for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) |
1244 | *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++); |
1245 | |
1246 | for (; i < count-3; i += 4) { |
1247 | __m128i vs = _mm_loadu_si128((const __m128i*)src); |
1248 | src += 4; |
1249 | __m128i va = _mm_srli_epi32(vs, 30); |
1250 | __m128i vr = _mm_and_si128(vs, rmask); |
1251 | __m128i vb = _mm_and_si128(vs, bmask); |
1252 | __m128i vg = _mm_and_si128(vs, gmask); |
1253 | va = _mm_mullo_epi16(va, afactor); |
1254 | vr = _mm_or_si128(_mm_srli_epi32(vr, 14), _mm_srli_epi32(vr, 24)); |
1255 | vg = _mm_or_si128(_mm_srli_epi32(vg, 4), _mm_srli_epi32(vg, 14)); |
1256 | vb = _mm_or_si128(_mm_slli_epi32(vb, 6), _mm_srli_epi32(vb, 4)); |
1257 | __m128i vrb; |
1258 | if (PixelOrder == PixelOrderRGB) |
1259 | vrb = _mm_or_si128(vr, _mm_slli_si128(vb, 2)); |
1260 | else |
1261 | vrb = _mm_or_si128(vb, _mm_slli_si128(vr, 2)); |
1262 | __m128i vga = _mm_or_si128(vg, _mm_slli_si128(va, 2)); |
1263 | _mm_store_si128((__m128i*)(buffer), _mm_unpacklo_epi16(vrb, vga)); |
1264 | buffer += 2; |
1265 | _mm_store_si128((__m128i*)(buffer), _mm_unpackhi_epi16(vrb, vga)); |
1266 | buffer += 2; |
1267 | } |
1268 | |
1269 | SIMD_EPILOGUE(i, count, 3) |
1270 | *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++); |
1271 | } |
1272 | #endif |
1273 | |
1274 | template<QtPixelOrder PixelOrder> |
1275 | static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count, |
1276 | const QVector<QRgb> *, QDitherInfo *) |
1277 | { |
1278 | #ifdef __SSE2__ |
1279 | qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count); |
1280 | #else |
1281 | for (int i = 0; i < count; ++i) |
1282 | buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]); |
1283 | #endif |
1284 | return buffer; |
1285 | } |
1286 | |
1287 | template<QtPixelOrder PixelOrder> |
1288 | static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count, |
1289 | const QVector<QRgb> *, QDitherInfo *) |
1290 | { |
1291 | return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr); |
1292 | } |
1293 | |
1294 | template<QtPixelOrder PixelOrder> |
1295 | static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1296 | const QVector<QRgb> *, QDitherInfo *) |
1297 | { |
1298 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1299 | UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>); |
1300 | } |
1301 | |
1302 | template<QtPixelOrder PixelOrder> |
1303 | static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count, |
1304 | const QVector<QRgb> *, QDitherInfo *) |
1305 | { |
1306 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1307 | UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); |
1308 | } |
1309 | |
1310 | template<QtPixelOrder PixelOrder> |
1311 | static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1312 | const QVector<QRgb> *, QDitherInfo *) |
1313 | { |
1314 | uint *d = reinterpret_cast<uint *>(dest) + index; |
1315 | UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>); |
1316 | } |
1317 | |
1318 | template<bool RGBA> |
1319 | void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count) |
1320 | { |
1321 | int i = 0; |
1322 | #ifdef __SSE2__ |
1323 | if (((uintptr_t)dst & 0x7) && count > 0) { |
1324 | uint s = (*src++).toArgb32(); |
1325 | if (RGBA) |
1326 | s = ARGB2RGBA(s); |
1327 | *dst++ = s; |
1328 | i++; |
1329 | } |
1330 | const __m128i vhalf = _mm_set1_epi32(0x80); |
1331 | const __m128i vzero = _mm_setzero_si128(); |
1332 | for (; i < count-1; i += 2) { |
1333 | __m128i vs = _mm_loadu_si128((const __m128i*)src); |
1334 | src += 2; |
1335 | if (!RGBA) { |
1336 | vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2)); |
1337 | vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2)); |
1338 | } |
1339 | __m128i v1 = _mm_unpacklo_epi16(vs, vzero); |
1340 | __m128i v2 = _mm_unpackhi_epi16(vs, vzero); |
1341 | v1 = _mm_add_epi32(v1, vhalf); |
1342 | v2 = _mm_add_epi32(v2, vhalf); |
1343 | v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8)); |
1344 | v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8)); |
1345 | v1 = _mm_srli_epi32(v1, 8); |
1346 | v2 = _mm_srli_epi32(v2, 8); |
1347 | v1 = _mm_packs_epi32(v1, v2); |
1348 | v1 = _mm_packus_epi16(v1, vzero); |
1349 | _mm_storel_epi64((__m128i*)(dst), v1); |
1350 | dst += 2; |
1351 | } |
1352 | #endif |
1353 | for (; i < count; i++) { |
1354 | uint s = (*src++).toArgb32(); |
1355 | if (RGBA) |
1356 | s = ARGB2RGBA(s); |
1357 | *dst++ = s; |
1358 | } |
1359 | } |
1360 | template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count); |
1361 | template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count); |
1362 | |
1363 | |
1364 | static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1365 | const QVector<QRgb> *, QDitherInfo *) |
1366 | { |
1367 | for (int i = 0; i < count; ++i) |
1368 | dest[index + i] = qAlpha(src[i]); |
1369 | } |
1370 | |
1371 | static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count, |
1372 | const QVector<QRgb> *, QDitherInfo *) |
1373 | { |
1374 | for (int i = 0; i < count; ++i) |
1375 | dest[index + i] = qGray(src[i]); |
1376 | } |
1377 | |
1378 | static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1379 | const QVector<QRgb> *, QDitherInfo *) |
1380 | { |
1381 | for (int i = 0; i < count; ++i) |
1382 | dest[index + i] = qGray(qUnpremultiply(src[i])); |
1383 | } |
1384 | |
1385 | static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count, |
1386 | const QVector<QRgb> *, QDitherInfo *) |
1387 | { |
1388 | unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index; |
1389 | for (int i = 0; i < count; ++i) |
1390 | d[i] = qGray(src[i]) * 257; |
1391 | } |
1392 | |
1393 | static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1394 | const QVector<QRgb> *, QDitherInfo *) |
1395 | { |
1396 | unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index; |
1397 | for (int i = 0; i < count; ++i) |
1398 | d[i] = qGray(qUnpremultiply(src[i])) * 257; |
1399 | } |
1400 | |
1401 | static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count, |
1402 | const QVector<QRgb> *, QDitherInfo *) |
1403 | { |
1404 | const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; |
1405 | for (int i = 0; i < count; ++i) |
1406 | buffer[i] = toArgb32(s[i]); |
1407 | return buffer; |
1408 | } |
1409 | |
1410 | static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count, |
1411 | const QVector<QRgb> *, QDitherInfo *) |
1412 | { |
1413 | QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index; |
1414 | for (int i = 0; i < count; ++i) |
1415 | d[i] = QRgba64::fromArgb32(src[i]); |
1416 | } |
1417 | |
1418 | static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count, |
1419 | const QVector<QRgb> *, QDitherInfo *) |
1420 | { |
1421 | const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index; |
1422 | for (int i = 0; i < count; ++i) |
1423 | buffer[i] = toArgb32(s[i].premultiplied()); |
1424 | return buffer; |
1425 | } |
1426 | |
1427 | static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count, |
1428 | const QVector<QRgb> *, QDitherInfo *) |
1429 | { |
1430 | QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index; |
1431 | for (int i = 0; i < count; ++i) |
1432 | d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied(); |
1433 | } |
1434 | |
1435 | // Note: |
1436 | // convertToArgb32() assumes that no color channel is less than 4 bits. |
1437 | // storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits. |
1438 | // QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits. |
1439 | QPixelLayout qPixelLayouts[QImage::NImageFormats] = { |
1440 | { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid |
1441 | { false, false, QPixelLayout::BPP1MSB, nullptr, |
1442 | convertIndexedToARGB32PM, convertIndexedToRGBA64PM, |
1443 | fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>, |
1444 | nullptr, nullptr }, // Format_Mono |
1445 | { false, false, QPixelLayout::BPP1LSB, nullptr, |
1446 | convertIndexedToARGB32PM, convertIndexedToRGBA64PM, |
1447 | fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>, |
1448 | nullptr, nullptr }, // Format_MonoLSB |
1449 | { false, false, QPixelLayout::BPP8, nullptr, |
1450 | convertIndexedToARGB32PM, convertIndexedToRGBA64PM, |
1451 | fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>, |
1452 | nullptr, nullptr }, // Format_Indexed8 |
1453 | // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong, |
1454 | // but everywhere this generic conversion would be wrong is currently overloaded. |
1455 | { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, |
1456 | convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32 |
1457 | { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM, |
1458 | convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32 |
1459 | { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough, |
1460 | convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied |
1461 | pixelLayoutRGB<QImage::Format_RGB16>(), |
1462 | pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(), |
1463 | pixelLayoutRGB<QImage::Format_RGB666>(), |
1464 | pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(), |
1465 | pixelLayoutRGB<QImage::Format_RGB555>(), |
1466 | pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(), |
1467 | pixelLayoutRGB<QImage::Format_RGB888>(), |
1468 | pixelLayoutRGB<QImage::Format_RGB444>(), |
1469 | pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(), |
1470 | { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, |
1471 | convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888 |
1472 | { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM, |
1473 | convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888 |
1474 | { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM, |
1475 | convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied |
1476 | { false, false, QPixelLayout::BPP32, rbSwap_rgb30, |
1477 | convertA2RGB30PMToARGB32PM<PixelOrderBGR>, |
1478 | convertA2RGB30PMToRGBA64PM<PixelOrderBGR>, |
1479 | fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, |
1480 | fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>, |
1481 | storeRGB30FromARGB32PM<PixelOrderBGR>, |
1482 | storeRGB30FromRGB32<PixelOrderBGR> |
1483 | }, // Format_BGR30 |
1484 | { true, true, QPixelLayout::BPP32, rbSwap_rgb30, |
1485 | convertA2RGB30PMToARGB32PM<PixelOrderBGR>, |
1486 | convertA2RGB30PMToRGBA64PM<PixelOrderBGR>, |
1487 | fetchA2RGB30PMToARGB32PM<PixelOrderBGR>, |
1488 | fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>, |
1489 | storeA2RGB30PMFromARGB32PM<PixelOrderBGR>, |
1490 | storeRGB30FromRGB32<PixelOrderBGR> |
1491 | }, // Format_A2BGR30_Premultiplied |
1492 | { false, false, QPixelLayout::BPP32, rbSwap_rgb30, |
1493 | convertA2RGB30PMToARGB32PM<PixelOrderRGB>, |
1494 | convertA2RGB30PMToRGBA64PM<PixelOrderRGB>, |
1495 | fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, |
1496 | fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>, |
1497 | storeRGB30FromARGB32PM<PixelOrderRGB>, |
1498 | storeRGB30FromRGB32<PixelOrderRGB> |
1499 | }, // Format_RGB30 |
1500 | { true, true, QPixelLayout::BPP32, rbSwap_rgb30, |
1501 | convertA2RGB30PMToARGB32PM<PixelOrderRGB>, |
1502 | convertA2RGB30PMToRGBA64PM<PixelOrderRGB>, |
1503 | fetchA2RGB30PMToARGB32PM<PixelOrderRGB>, |
1504 | fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>, |
1505 | storeA2RGB30PMFromARGB32PM<PixelOrderRGB>, |
1506 | storeRGB30FromRGB32<PixelOrderRGB> |
1507 | }, // Format_A2RGB30_Premultiplied |
1508 | { true, true, QPixelLayout::BPP8, nullptr, |
1509 | convertAlpha8ToRGB32, convertAlpha8ToRGB64, |
1510 | fetchAlpha8ToRGB32, fetchAlpha8ToRGB64, |
1511 | storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8 |
1512 | { false, false, QPixelLayout::BPP8, nullptr, |
1513 | convertGrayscale8ToRGB32, convertGrayscale8ToRGB64, |
1514 | fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64, |
1515 | storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8 |
1516 | { false, false, QPixelLayout::BPP64, nullptr, |
1517 | convertPassThrough, nullptr, |
1518 | fetchRGB64ToRGB32, fetchPassThrough64, |
1519 | storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64 |
1520 | { true, false, QPixelLayout::BPP64, nullptr, |
1521 | convertARGB32ToARGB32PM, nullptr, |
1522 | fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM, |
1523 | storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64 |
1524 | { true, true, QPixelLayout::BPP64, nullptr, |
1525 | convertPassThrough, nullptr, |
1526 | fetchRGB64ToRGB32, fetchPassThrough64, |
1527 | storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied |
1528 | { false, false, QPixelLayout::BPP16, nullptr, |
1529 | convertGrayscale16ToRGB32, convertGrayscale16ToRGBA64, |
1530 | fetchGrayscale16ToRGB32, fetchGrayscale16ToRGBA64, |
1531 | storeGrayscale16FromARGB32PM, storeGrayscale16FromRGB32 } // Format_Grayscale16 |
1532 | }; |
1533 | |
1534 | Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats); |
1535 | |
1536 | static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length) |
1537 | { |
1538 | for (int i = 0; i < length; ++i) { |
1539 | dest[i] = toArgb32(src[i]); |
1540 | } |
1541 | } |
1542 | |
1543 | template<QImage::Format format> |
1544 | static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1545 | const QVector<QRgb> *clut, QDitherInfo *dither) |
1546 | { |
1547 | uint buffer[BufferSize]; |
1548 | convertFromRgb64(buffer, src, count); |
1549 | qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither); |
1550 | } |
1551 | |
1552 | static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1553 | const QVector<QRgb> *, QDitherInfo *) |
1554 | { |
1555 | uint *d = (uint*)dest + index; |
1556 | for (int i = 0; i < count; ++i) |
1557 | d[i] = toArgb32(src[i].unpremultiplied()); |
1558 | } |
1559 | |
1560 | static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1561 | const QVector<QRgb> *, QDitherInfo *) |
1562 | { |
1563 | uint *d = (uint*)dest + index; |
1564 | for (int i = 0; i < count; ++i) |
1565 | d[i] = toRgba8888(src[i].unpremultiplied()); |
1566 | } |
1567 | |
1568 | template<QtPixelOrder PixelOrder> |
1569 | static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1570 | const QVector<QRgb> *, QDitherInfo *) |
1571 | { |
1572 | uint *d = (uint*)dest + index; |
1573 | #ifdef __SSE2__ |
1574 | qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count); |
1575 | #else |
1576 | for (int i = 0; i < count; ++i) |
1577 | d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]); |
1578 | #endif |
1579 | } |
1580 | |
1581 | static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1582 | const QVector<QRgb> *, QDitherInfo *) |
1583 | { |
1584 | QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; |
1585 | for (int i = 0; i < count; ++i) { |
1586 | d[i] = src[i].unpremultiplied(); |
1587 | d[i].setAlpha(65535); |
1588 | } |
1589 | } |
1590 | |
1591 | static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1592 | const QVector<QRgb> *, QDitherInfo *) |
1593 | { |
1594 | QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; |
1595 | for (int i = 0; i < count; ++i) |
1596 | d[i] = src[i].unpremultiplied(); |
1597 | } |
1598 | |
1599 | static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1600 | const QVector<QRgb> *, QDitherInfo *) |
1601 | { |
1602 | QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index; |
1603 | if (d != src) |
1604 | memcpy(d, src, count * sizeof(QRgba64)); |
1605 | } |
1606 | |
1607 | static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count, |
1608 | const QVector<QRgb> *, QDitherInfo *) |
1609 | { |
1610 | quint16 *d = reinterpret_cast<quint16*>(dest) + index; |
1611 | for (int i = 0; i < count; ++i) { |
1612 | QRgba64 s = src[i].unpremultiplied(); |
1613 | d[i] = qGray(s.red(), s.green(), s.blue()); |
1614 | } |
1615 | } |
1616 | |
1617 | ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = { |
1618 | nullptr, |
1619 | nullptr, |
1620 | nullptr, |
1621 | nullptr, |
1622 | storeGenericFromRGBA64PM<QImage::Format_RGB32>, |
1623 | storeARGB32FromRGBA64PM, |
1624 | storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>, |
1625 | storeGenericFromRGBA64PM<QImage::Format_RGB16>, |
1626 | storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>, |
1627 | storeGenericFromRGBA64PM<QImage::Format_RGB666>, |
1628 | storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>, |
1629 | storeGenericFromRGBA64PM<QImage::Format_RGB555>, |
1630 | storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>, |
1631 | storeGenericFromRGBA64PM<QImage::Format_RGB888>, |
1632 | storeGenericFromRGBA64PM<QImage::Format_RGB444>, |
1633 | storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>, |
1634 | storeGenericFromRGBA64PM<QImage::Format_RGBX8888>, |
1635 | storeRGBA8888FromRGBA64PM, |
1636 | storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>, |
1637 | storeRGB30FromRGBA64PM<PixelOrderBGR>, |
1638 | storeRGB30FromRGBA64PM<PixelOrderBGR>, |
1639 | storeRGB30FromRGBA64PM<PixelOrderRGB>, |
1640 | storeRGB30FromRGBA64PM<PixelOrderRGB>, |
1641 | storeGenericFromRGBA64PM<QImage::Format_Alpha8>, |
1642 | storeGenericFromRGBA64PM<QImage::Format_Grayscale8>, |
1643 | storeRGBX64FromRGBA64PM, |
1644 | storeRGBA64FromRGBA64PM, |
1645 | storeRGBA64PMFromRGBA64PM, |
1646 | storeGray16FromRGBA64PM |
1647 | }; |
1648 | |
1649 | /* |
1650 | Destination fetch. This is simple as we don't have to do bounds checks or |
1651 | transformations |
1652 | */ |
1653 | |
1654 | static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
1655 | { |
1656 | uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y); |
1657 | uint *start = buffer; |
1658 | const uint *end = buffer + length; |
1659 | while (buffer < end) { |
1660 | *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0; |
1661 | ++buffer; |
1662 | ++x; |
1663 | } |
1664 | return start; |
1665 | } |
1666 | |
1667 | static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
1668 | { |
1669 | uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y); |
1670 | uint *start = buffer; |
1671 | const uint *end = buffer + length; |
1672 | while (buffer < end) { |
1673 | *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0; |
1674 | ++buffer; |
1675 | ++x; |
1676 | } |
1677 | return start; |
1678 | } |
1679 | |
1680 | static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int) |
1681 | { |
1682 | return (uint *)rasterBuffer->scanLine(y) + x; |
1683 | } |
1684 | |
1685 | static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
1686 | { |
1687 | const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x; |
1688 | for (int i = 0; i < length; ++i) |
1689 | buffer[i] = qConvertRgb16To32(data[i]); |
1690 | return buffer; |
1691 | } |
1692 | |
1693 | static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
1694 | { |
1695 | const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; |
1696 | return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); |
1697 | } |
1698 | |
1699 | static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int) |
1700 | { |
1701 | return buffer; |
1702 | } |
1703 | |
1704 | static DestFetchProc destFetchProc[QImage::NImageFormats] = |
1705 | { |
1706 | 0, // Format_Invalid |
1707 | destFetchMono, // Format_Mono, |
1708 | destFetchMonoLsb, // Format_MonoLSB |
1709 | 0, // Format_Indexed8 |
1710 | destFetchARGB32P, // Format_RGB32 |
1711 | destFetch, // Format_ARGB32, |
1712 | destFetchARGB32P, // Format_ARGB32_Premultiplied |
1713 | destFetchRGB16, // Format_RGB16 |
1714 | destFetch, // Format_ARGB8565_Premultiplied |
1715 | destFetch, // Format_RGB666 |
1716 | destFetch, // Format_ARGB6666_Premultiplied |
1717 | destFetch, // Format_RGB555 |
1718 | destFetch, // Format_ARGB8555_Premultiplied |
1719 | destFetch, // Format_RGB888 |
1720 | destFetch, // Format_RGB444 |
1721 | destFetch, // Format_ARGB4444_Premultiplied |
1722 | destFetch, // Format_RGBX8888 |
1723 | destFetch, // Format_RGBA8888 |
1724 | destFetch, // Format_RGBA8888_Premultiplied |
1725 | destFetch, // Format_BGR30 |
1726 | destFetch, // Format_A2BGR30_Premultiplied |
1727 | destFetch, // Format_RGB30 |
1728 | destFetch, // Format_A2RGB30_Premultiplied |
1729 | destFetch, // Format_Alpha8 |
1730 | destFetch, // Format_Grayscale8 |
1731 | destFetch, // Format_RGBX64 |
1732 | destFetch, // Format_RGBA64 |
1733 | destFetch, // Format_RGBA64_Premultiplied |
1734 | destFetch, // Format_Grayscale16 |
1735 | }; |
1736 | |
1737 | #if QT_CONFIG(raster_64bit) |
1738 | static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) |
1739 | { |
1740 | const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; |
1741 | return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr)); |
1742 | } |
1743 | |
1744 | static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int) |
1745 | { |
1746 | return (QRgba64 *)rasterBuffer->scanLine(y) + x; |
1747 | } |
1748 | |
1749 | static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int) |
1750 | { |
1751 | return buffer; |
1752 | } |
1753 | |
1754 | static DestFetchProc64 destFetchProc64[QImage::NImageFormats] = |
1755 | { |
1756 | 0, // Format_Invalid |
1757 | 0, // Format_Mono, |
1758 | 0, // Format_MonoLSB |
1759 | 0, // Format_Indexed8 |
1760 | destFetch64, // Format_RGB32 |
1761 | destFetch64, // Format_ARGB32, |
1762 | destFetch64, // Format_ARGB32_Premultiplied |
1763 | destFetch64, // Format_RGB16 |
1764 | destFetch64, // Format_ARGB8565_Premultiplied |
1765 | destFetch64, // Format_RGB666 |
1766 | destFetch64, // Format_ARGB6666_Premultiplied |
1767 | destFetch64, // Format_RGB555 |
1768 | destFetch64, // Format_ARGB8555_Premultiplied |
1769 | destFetch64, // Format_RGB888 |
1770 | destFetch64, // Format_RGB444 |
1771 | destFetch64, // Format_ARGB4444_Premultiplied |
1772 | destFetch64, // Format_RGBX8888 |
1773 | destFetch64, // Format_RGBA8888 |
1774 | destFetch64, // Format_RGBA8888_Premultiplied |
1775 | destFetch64, // Format_BGR30 |
1776 | destFetch64, // Format_A2BGR30_Premultiplied |
1777 | destFetch64, // Format_RGB30 |
1778 | destFetch64, // Format_A2RGB30_Premultiplied |
1779 | destFetch64, // Format_Alpha8 |
1780 | destFetch64, // Format_Grayscale8 |
1781 | destFetchRGB64, // Format_RGBX64 |
1782 | destFetch64, // Format_RGBA64 |
1783 | destFetchRGB64, // Format_RGBA64_Premultiplied |
1784 | destFetch64, // Format_Grayscale16 |
1785 | }; |
1786 | #endif |
1787 | |
1788 | /* |
1789 | Returns the color in the mono destination color table |
1790 | that is the "nearest" to /color/. |
1791 | */ |
1792 | static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf) |
1793 | { |
1794 | QRgb color_0 = qPremultiply(rbuf->destColor0); |
1795 | QRgb color_1 = qPremultiply(rbuf->destColor1); |
1796 | color = qPremultiply(color); |
1797 | |
1798 | int r = qRed(color); |
1799 | int g = qGreen(color); |
1800 | int b = qBlue(color); |
1801 | int rx, gx, bx; |
1802 | int dist_0, dist_1; |
1803 | |
1804 | rx = r - qRed(color_0); |
1805 | gx = g - qGreen(color_0); |
1806 | bx = b - qBlue(color_0); |
1807 | dist_0 = rx*rx + gx*gx + bx*bx; |
1808 | |
1809 | rx = r - qRed(color_1); |
1810 | gx = g - qGreen(color_1); |
1811 | bx = b - qBlue(color_1); |
1812 | dist_1 = rx*rx + gx*gx + bx*bx; |
1813 | |
1814 | if (dist_0 < dist_1) |
1815 | return color_0; |
1816 | return color_1; |
1817 | } |
1818 | |
1819 | /* |
1820 | Destination store. |
1821 | */ |
1822 | |
1823 | static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) |
1824 | { |
1825 | uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y); |
1826 | if (rasterBuffer->monoDestinationWithClut) { |
1827 | for (int i = 0; i < length; ++i) { |
1828 | if (buffer[i] == rasterBuffer->destColor0) { |
1829 | data[x >> 3] &= ~(0x80 >> (x & 7)); |
1830 | } else if (buffer[i] == rasterBuffer->destColor1) { |
1831 | data[x >> 3] |= 0x80 >> (x & 7); |
1832 | } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) { |
1833 | data[x >> 3] &= ~(0x80 >> (x & 7)); |
1834 | } else { |
1835 | data[x >> 3] |= 0x80 >> (x & 7); |
1836 | } |
1837 | ++x; |
1838 | } |
1839 | } else { |
1840 | for (int i = 0; i < length; ++i) { |
1841 | if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15])) |
1842 | data[x >> 3] |= 0x80 >> (x & 7); |
1843 | else |
1844 | data[x >> 3] &= ~(0x80 >> (x & 7)); |
1845 | ++x; |
1846 | } |
1847 | } |
1848 | } |
1849 | |
1850 | static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) |
1851 | { |
1852 | uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y); |
1853 | if (rasterBuffer->monoDestinationWithClut) { |
1854 | for (int i = 0; i < length; ++i) { |
1855 | if (buffer[i] == rasterBuffer->destColor0) { |
1856 | data[x >> 3] &= ~(1 << (x & 7)); |
1857 | } else if (buffer[i] == rasterBuffer->destColor1) { |
1858 | data[x >> 3] |= 1 << (x & 7); |
1859 | } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) { |
1860 | data[x >> 3] &= ~(1 << (x & 7)); |
1861 | } else { |
1862 | data[x >> 3] |= 1 << (x & 7); |
1863 | } |
1864 | ++x; |
1865 | } |
1866 | } else { |
1867 | for (int i = 0; i < length; ++i) { |
1868 | if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15])) |
1869 | data[x >> 3] |= 1 << (x & 7); |
1870 | else |
1871 | data[x >> 3] &= ~(1 << (x & 7)); |
1872 | ++x; |
1873 | } |
1874 | } |
1875 | } |
1876 | |
1877 | static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) |
1878 | { |
1879 | quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x; |
1880 | for (int i = 0; i < length; ++i) |
1881 | data[i] = qConvertRgb32To16(buffer[i]); |
1882 | } |
1883 | |
1884 | static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) |
1885 | { |
1886 | const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format]; |
1887 | ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM; |
1888 | if (!layout->premultiplied && !layout->hasAlphaChannel) |
1889 | store = layout->storeFromRGB32; |
1890 | uchar *dest = rasterBuffer->scanLine(y); |
1891 | store(dest, buffer, x, length, nullptr, nullptr); |
1892 | } |
1893 | |
1894 | static DestStoreProc destStoreProc[QImage::NImageFormats] = |
1895 | { |
1896 | 0, // Format_Invalid |
1897 | destStoreMono, // Format_Mono, |
1898 | destStoreMonoLsb, // Format_MonoLSB |
1899 | 0, // Format_Indexed8 |
1900 | 0, // Format_RGB32 |
1901 | destStore, // Format_ARGB32, |
1902 | 0, // Format_ARGB32_Premultiplied |
1903 | destStoreRGB16, // Format_RGB16 |
1904 | destStore, // Format_ARGB8565_Premultiplied |
1905 | destStore, // Format_RGB666 |
1906 | destStore, // Format_ARGB6666_Premultiplied |
1907 | destStore, // Format_RGB555 |
1908 | destStore, // Format_ARGB8555_Premultiplied |
1909 | destStore, // Format_RGB888 |
1910 | destStore, // Format_RGB444 |
1911 | destStore, // Format_ARGB4444_Premultiplied |
1912 | destStore, // Format_RGBX8888 |
1913 | destStore, // Format_RGBA8888 |
1914 | destStore, // Format_RGBA8888_Premultiplied |
1915 | destStore, // Format_BGR30 |
1916 | destStore, // Format_A2BGR30_Premultiplied |
1917 | destStore, // Format_RGB30 |
1918 | destStore, // Format_A2RGB30_Premultiplied |
1919 | destStore, // Format_Alpha8 |
1920 | destStore, // Format_Grayscale8 |
1921 | destStore, // Format_RGBX64 |
1922 | destStore, // Format_RGBA64 |
1923 | destStore, // Format_RGBA64_Premultiplied |
1924 | destStore, // Format_Grayscale16 |
1925 | }; |
1926 | |
1927 | #if QT_CONFIG(raster_64bit) |
1928 | static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) |
1929 | { |
1930 | auto store = qStoreFromRGBA64PM[rasterBuffer->format]; |
1931 | uchar *dest = rasterBuffer->scanLine(y); |
1932 | store(dest, buffer, x, length, nullptr, nullptr); |
1933 | } |
1934 | |
1935 | static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length) |
1936 | { |
1937 | QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x; |
1938 | for (int i = 0; i < length; ++i) { |
1939 | dest[i] = buffer[i].unpremultiplied(); |
1940 | } |
1941 | } |
1942 | |
1943 | static DestStoreProc64 destStoreProc64[QImage::NImageFormats] = |
1944 | { |
1945 | 0, // Format_Invalid |
1946 | 0, // Format_Mono, |
1947 | 0, // Format_MonoLSB |
1948 | 0, // Format_Indexed8 |
1949 | destStore64, // Format_RGB32 |
1950 | destStore64, // Format_ARGB32, |
1951 | destStore64, // Format_ARGB32_Premultiplied |
1952 | destStore64, // Format_RGB16 |
1953 | destStore64, // Format_ARGB8565_Premultiplied |
1954 | destStore64, // Format_RGB666 |
1955 | destStore64, // Format_ARGB6666_Premultiplied |
1956 | destStore64, // Format_RGB555 |
1957 | destStore64, // Format_ARGB8555_Premultiplied |
1958 | destStore64, // Format_RGB888 |
1959 | destStore64, // Format_RGB444 |
1960 | destStore64, // Format_ARGB4444_Premultiplied |
1961 | destStore64, // Format_RGBX8888 |
1962 | destStore64, // Format_RGBA8888 |
1963 | destStore64, // Format_RGBA8888_Premultiplied |
1964 | destStore64, // Format_BGR30 |
1965 | destStore64, // Format_A2BGR30_Premultiplied |
1966 | destStore64, // Format_RGB30 |
1967 | destStore64, // Format_A2RGB30_Premultiplied |
1968 | destStore64, // Format_Alpha8 |
1969 | destStore64, // Format_Grayscale8 |
1970 | 0, // Format_RGBX64 |
1971 | destStore64RGBA64, // Format_RGBA64 |
1972 | 0, // Format_RGBA64_Premultiplied |
1973 | destStore64, // Format_Grayscale16 |
1974 | }; |
1975 | #endif |
1976 | |
1977 | /* |
1978 | Source fetches |
1979 | |
1980 | This is a bit more complicated, as we need several fetch routines for every surface type |
1981 | |
1982 | We need 5 fetch methods per surface type: |
1983 | untransformed |
1984 | transformed (tiled and not tiled) |
1985 | transformed bilinear (tiled and not tiled) |
1986 | |
1987 | We don't need bounds checks for untransformed, but we need them for the other ones. |
1988 | |
1989 | The generic implementation does pixel by pixel fetches |
1990 | */ |
1991 | |
1992 | enum TextureBlendType { |
1993 | BlendUntransformed, |
1994 | BlendTiled, |
1995 | BlendTransformed, |
1996 | BlendTransformedTiled, |
1997 | BlendTransformedBilinear, |
1998 | BlendTransformedBilinearTiled, |
1999 | NBlendTypes |
2000 | }; |
2001 | |
2002 | static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *, |
2003 | const QSpanData *data, int y, int x, int length) |
2004 | { |
2005 | const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; |
2006 | return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr); |
2007 | } |
2008 | |
2009 | static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *, |
2010 | const QSpanData *data, int y, int x, int) |
2011 | { |
2012 | const uchar *scanLine = data->texture.scanLine(y); |
2013 | return reinterpret_cast<const uint *>(scanLine) + x; |
2014 | } |
2015 | |
2016 | static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *, |
2017 | const QSpanData *data, int y, int x, |
2018 | int length) |
2019 | { |
2020 | const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x; |
2021 | for (int i = 0; i < length; ++i) |
2022 | buffer[i] = qConvertRgb16To32(scanLine[i]); |
2023 | return buffer; |
2024 | } |
2025 | |
2026 | #if QT_CONFIG(raster_64bit) |
2027 | static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *, |
2028 | const QSpanData *data, int y, int x, int length) |
2029 | { |
2030 | const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; |
2031 | return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr); |
2032 | } |
2033 | |
2034 | static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *, |
2035 | const QSpanData *data, int y, int x, int) |
2036 | { |
2037 | const uchar *scanLine = data->texture.scanLine(y); |
2038 | return reinterpret_cast<const QRgba64 *>(scanLine) + x; |
2039 | } |
2040 | #endif |
2041 | |
2042 | template<TextureBlendType blendType> |
2043 | inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v) |
2044 | { |
2045 | Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); |
2046 | if (blendType == BlendTransformedTiled) { |
2047 | if (v < 0 || v >= max) { |
2048 | v %= max; |
2049 | if (v < 0) v += max; |
2050 | } |
2051 | } else { |
2052 | v = qBound(l1, v, l2); |
2053 | } |
2054 | } |
2055 | |
2056 | static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data) |
2057 | { |
2058 | if (Q_UNLIKELY(!data->fast_matrix)) |
2059 | return false; |
2060 | |
2061 | qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale; |
2062 | qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale; |
2063 | qreal minc = std::min(fx, fy); |
2064 | qreal maxc = std::max(fx, fy); |
2065 | fx += std::trunc(data->m11 * fixed_scale) * length; |
2066 | fy += std::trunc(data->m12 * fixed_scale) * length; |
2067 | minc = std::min(minc, std::min(fx, fy)); |
2068 | maxc = std::max(maxc, std::max(fx, fy)); |
2069 | |
2070 | return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max(); |
2071 | } |
2072 | |
2073 | template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T> |
2074 | static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data, |
2075 | int y, int x, int length) |
2076 | { |
2077 | Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); |
2078 | const QTextureData &image = data->texture; |
2079 | |
2080 | const qreal cx = x + qreal(0.5); |
2081 | const qreal cy = y + qreal(0.5); |
2082 | |
2083 | constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint); |
2084 | const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; |
2085 | if (!useFetch) |
2086 | Q_ASSERT(layout->bpp == bpp); |
2087 | // When templated 'fetch' should be inlined at compile time: |
2088 | const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>); |
2089 | |
2090 | if (canUseFastMatrixPath(cx, cy, length, data)) { |
2091 | // The increment pr x in the scanline |
2092 | int fdx = (int)(data->m11 * fixed_scale); |
2093 | int fdy = (int)(data->m12 * fixed_scale); |
2094 | |
2095 | int fx = int((data->m21 * cy |
2096 | + data->m11 * cx + data->dx) * fixed_scale); |
2097 | int fy = int((data->m22 * cy |
2098 | + data->m12 * cx + data->dy) * fixed_scale); |
2099 | |
2100 | if (fdy == 0) { // simple scale, no rotation or shear |
2101 | int py = (fy >> 16); |
2102 | fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); |
2103 | const uchar *src = image.scanLine(py); |
2104 | |
2105 | int i = 0; |
2106 | if (blendType == BlendTransformed) { |
2107 | int fastLen = length; |
2108 | if (fdx > 0) |
2109 | fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); |
2110 | else if (fdx < 0) |
2111 | fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); |
2112 | |
2113 | for (; i < fastLen; ++i) { |
2114 | int x1 = (fx >> 16); |
2115 | int x2 = x1; |
2116 | fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1); |
2117 | if (x1 == x2) |
2118 | break; |
2119 | if (useFetch) |
2120 | buffer[i] = fetch(src, x1); |
2121 | else |
2122 | buffer[i] = reinterpret_cast<const T*>(src)[x1]; |
2123 | fx += fdx; |
2124 | } |
2125 | |
2126 | for (; i < fastLen; ++i) { |
2127 | int px = (fx >> 16); |
2128 | if (useFetch) |
2129 | buffer[i] = fetch(src, px); |
2130 | else |
2131 | buffer[i] = reinterpret_cast<const T*>(src)[px]; |
2132 | fx += fdx; |
2133 | } |
2134 | } |
2135 | |
2136 | for (; i < length; ++i) { |
2137 | int px = (fx >> 16); |
2138 | fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); |
2139 | if (useFetch) |
2140 | buffer[i] = fetch(src, px); |
2141 | else |
2142 | buffer[i] = reinterpret_cast<const T*>(src)[px]; |
2143 | fx += fdx; |
2144 | } |
2145 | } else { // rotation or shear |
2146 | int i = 0; |
2147 | if (blendType == BlendTransformed) { |
2148 | int fastLen = length; |
2149 | if (fdx > 0) |
2150 | fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx)); |
2151 | else if (fdx < 0) |
2152 | fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx)); |
2153 | if (fdy > 0) |
2154 | fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy)); |
2155 | else if (fdy < 0) |
2156 | fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy)); |
2157 | |
2158 | for (; i < fastLen; ++i) { |
2159 | int x1 = (fx >> 16); |
2160 | int y1 = (fy >> 16); |
2161 | int x2 = x1; |
2162 | int y2 = y1; |
2163 | fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1); |
2164 | fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1); |
2165 | if (x1 == x2 && y1 == y2) |
2166 | break; |
2167 | if (useFetch) |
2168 | buffer[i] = fetch(image.scanLine(y1), x1); |
2169 | else |
2170 | buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1]; |
2171 | fx += fdx; |
2172 | fy += fdy; |
2173 | } |
2174 | |
2175 | for (; i < fastLen; ++i) { |
2176 | int px = (fx >> 16); |
2177 | int py = (fy >> 16); |
2178 | if (useFetch) |
2179 | buffer[i] = fetch(image.scanLine(py), px); |
2180 | else |
2181 | buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px]; |
2182 | fx += fdx; |
2183 | fy += fdy; |
2184 | } |
2185 | } |
2186 | |
2187 | for (; i < length; ++i) { |
2188 | int px = (fx >> 16); |
2189 | int py = (fy >> 16); |
2190 | fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); |
2191 | fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); |
2192 | if (useFetch) |
2193 | buffer[i] = fetch(image.scanLine(py), px); |
2194 | else |
2195 | buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px]; |
2196 | fx += fdx; |
2197 | fy += fdy; |
2198 | } |
2199 | } |
2200 | } else { |
2201 | const qreal fdx = data->m11; |
2202 | const qreal fdy = data->m12; |
2203 | const qreal fdw = data->m13; |
2204 | |
2205 | qreal fx = data->m21 * cy + data->m11 * cx + data->dx; |
2206 | qreal fy = data->m22 * cy + data->m12 * cx + data->dy; |
2207 | qreal fw = data->m23 * cy + data->m13 * cx + data->m33; |
2208 | |
2209 | T *const end = buffer + length; |
2210 | T *b = buffer; |
2211 | while (b < end) { |
2212 | const qreal iw = fw == 0 ? 1 : 1 / fw; |
2213 | const qreal tx = fx * iw; |
2214 | const qreal ty = fy * iw; |
2215 | int px = qFloor(tx); |
2216 | int py = qFloor(ty); |
2217 | |
2218 | fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py); |
2219 | fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px); |
2220 | if (useFetch) |
2221 | *b = fetch(image.scanLine(py), px); |
2222 | else |
2223 | *b = reinterpret_cast<const T*>(image.scanLine(py))[px]; |
2224 | |
2225 | fx += fdx; |
2226 | fy += fdy; |
2227 | fw += fdw; |
2228 | //force increment to avoid /0 |
2229 | if (!fw) { |
2230 | fw += fdw; |
2231 | } |
2232 | ++b; |
2233 | } |
2234 | } |
2235 | } |
2236 | |
2237 | template<TextureBlendType blendType, QPixelLayout::BPP bpp> |
2238 | static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data, |
2239 | int y, int x, int length) |
2240 | { |
2241 | Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled); |
2242 | const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; |
2243 | fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length); |
2244 | layout->convertToARGB32PM(buffer, length, data->texture.colorTable); |
2245 | return buffer; |
2246 | } |
2247 | |
2248 | #if QT_CONFIG(raster_64bit) |
2249 | template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */ |
2250 | static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data, |
2251 | int y, int x, int length) |
2252 | { |
2253 | const QPixelLayout *layout = &qPixelLayouts[data->texture.format]; |
2254 | if (layout->bpp != QPixelLayout::BPP64) { |
2255 | uint buffer32[BufferSize]; |
2256 | Q_ASSERT(length <= BufferSize); |
2257 | if (layout->bpp == QPixelLayout::BPP32) |
2258 | fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length); |
2259 | else |
2260 | fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length); |
2261 | return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr); |
2262 | } |
2263 | |
2264 | fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length); |
2265 | if (data->texture.format == QImage::Format_RGBA64) |
2266 | convertRGBA64ToRGBA64PM(buffer, length); |
2267 | return buffer; |
2268 | } |
2269 | #endif |
2270 | |
2271 | /** \internal |
2272 | interpolate 4 argb pixels with the distx and disty factor. |
2273 | distx and disty must be between 0 and 16 |
2274 | */ |
2275 | static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) |
2276 | { |
2277 | uint distxy = distx * disty; |
2278 | //idistx * disty = (16-distx) * disty = 16*disty - distxy |
2279 | //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy |
2280 | uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy); |
2281 | uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy); |
2282 | uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy)); |
2283 | uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy)); |
2284 | uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy)); |
2285 | uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy)); |
2286 | uint brrb = ((br & 0x00ff00ff) * (distxy)); |
2287 | uint brag = (((br & 0xff00ff00) >> 8) * (distxy)); |
2288 | return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00); |
2289 | } |
2290 | |
2291 | #if defined(__SSE2__) |
2292 | #define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \ |
2293 | { \ |
2294 | const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \ |
2295 | const __m128i distx_ = _mm_slli_epi16(distx, 4); \ |
2296 | const __m128i disty_ = _mm_slli_epi16(disty, 4); \ |
2297 | const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \ |
2298 | const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \ |
2299 | const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \ |
2300 | \ |
2301 | __m128i tlAG = _mm_srli_epi16(tl, 8); \ |
2302 | __m128i tlRB = _mm_and_si128(tl, colorMask); \ |
2303 | __m128i trAG = _mm_srli_epi16(tr, 8); \ |
2304 | __m128i trRB = _mm_and_si128(tr, colorMask); \ |
2305 | __m128i blAG = _mm_srli_epi16(bl, 8); \ |
2306 | __m128i blRB = _mm_and_si128(bl, colorMask); \ |
2307 | __m128i brAG = _mm_srli_epi16(br, 8); \ |
2308 | __m128i brRB = _mm_and_si128(br, colorMask); \ |
2309 | \ |
2310 | tlAG = _mm_mullo_epi16(tlAG, idxidy); \ |
2311 | tlRB = _mm_mullo_epi16(tlRB, idxidy); \ |
2312 | trAG = _mm_mullo_epi16(trAG, dxidy); \ |
2313 | trRB = _mm_mullo_epi16(trRB, dxidy); \ |
2314 | blAG = _mm_mullo_epi16(blAG, idxdy); \ |
2315 | blRB = _mm_mullo_epi16(blRB, idxdy); \ |
2316 | brAG = _mm_mullo_epi16(brAG, dxdy); \ |
2317 | brRB = _mm_mullo_epi16(brRB, dxdy); \ |
2318 | \ |
2319 | /* Add the values, and shift to only keep 8 significant bits per colors */ \ |
2320 | __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \ |
2321 | __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \ |
2322 | rAG = _mm_andnot_si128(colorMask, rAG); \ |
2323 | rRB = _mm_srli_epi16(rRB, 8); \ |
2324 | _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \ |
2325 | } |
2326 | #endif |
2327 | |
2328 | #if defined(__ARM_NEON__) |
2329 | #define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \ |
2330 | { \ |
2331 | const int16x8_t dxdy = vmulq_s16(distx, disty); \ |
2332 | const int16x8_t distx_ = vshlq_n_s16(distx, 4); \ |
2333 | const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \ |
2334 | const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \ |
2335 | const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \ |
2336 | \ |
2337 | int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \ |
2338 | int16x8_t tlRB = vandq_s16(tl, colorMask); \ |
2339 | int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \ |
2340 | int16x8_t trRB = vandq_s16(tr, colorMask); \ |
2341 | int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \ |
2342 | int16x8_t blRB = vandq_s16(bl, colorMask); \ |
2343 | int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \ |
2344 | int16x8_t brRB = vandq_s16(br, colorMask); \ |
2345 | \ |
2346 | int16x8_t rAG = vmulq_s16(tlAG, idxidy); \ |
2347 | int16x8_t rRB = vmulq_s16(tlRB, idxidy); \ |
2348 | rAG = vmlaq_s16(rAG, trAG, dxidy); \ |
2349 | rRB = vmlaq_s16(rRB, trRB, dxidy); \ |
2350 | rAG = vmlaq_s16(rAG, blAG, idxdy); \ |
2351 | rRB = vmlaq_s16(rRB, blRB, idxdy); \ |
2352 | rAG = vmlaq_s16(rAG, brAG, dxdy); \ |
2353 | rRB = vmlaq_s16(rRB, brRB, dxdy); \ |
2354 | \ |
2355 | rAG = vandq_s16(invColorMask, rAG); \ |
2356 | rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \ |
2357 | vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \ |
2358 | } |
2359 | #endif |
2360 | |
2361 | template<TextureBlendType blendType> |
2362 | void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2); |
2363 | |
2364 | template<> |
2365 | inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2) |
2366 | { |
2367 | v1 %= max; |
2368 | if (v1 < 0) |
2369 | v1 += max; |
2370 | v2 = v1 + 1; |
2371 | if (v2 == max) |
2372 | v2 = 0; |
2373 | Q_ASSERT(v1 >= 0 && v1 < max); |
2374 | Q_ASSERT(v2 >= 0 && v2 < max); |
2375 | } |
2376 | |
2377 | template<> |
2378 | inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2) |
2379 | { |
2380 | if (v1 < l1) |
2381 | v2 = |
---|