1/****************************************************************************
2**
3** Copyright (C) 2018 The Qt Company Ltd.
4** Copyright (C) 2018 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtGui module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#include <qglobal.h>
42
43#include <qstylehints.h>
44#include <qguiapplication.h>
45#include <qatomic.h>
46#include <private/qcolortrclut_p.h>
47#include <private/qdrawhelper_p.h>
48#include <private/qpaintengine_raster_p.h>
49#include <private/qpainter_p.h>
50#include <private/qdrawhelper_x86_p.h>
51#include <private/qdrawingprimitive_sse2_p.h>
52#include <private/qdrawhelper_neon_p.h>
53#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
54#include <private/qdrawhelper_mips_dsp_p.h>
55#endif
56#include <private/qguiapplication_p.h>
57#include <private/qrgba64_p.h>
58#include <qendian.h>
59#include <qloggingcategory.h>
60#include <qmath.h>
61
62QT_BEGIN_NAMESPACE
63
64Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
65
66#define MASK(src, a) src = BYTE_MUL(src, a)
67
68/*
69 constants and structures
70*/
71
72enum {
73 fixed_scale = 1 << 16,
74 half_point = 1 << 15
75};
76
77template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
78template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
79template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
80template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift();
81template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth();
82template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift();
83template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth();
84template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift();
85
86template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return 5; }
87template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return 4; }
88template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return 5; }
89template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return 6; }
90template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return 8; }
91template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
92template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
93template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
94template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
95template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; }
96template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; }
97template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
98
99template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; }
100template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; }
101template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; }
102template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return 12; }
103template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return 16; }
104template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return 8; }
105template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; }
106template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; }
107template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; }
108#if Q_BYTE_ORDER == Q_BIG_ENDIAN
109template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; }
110template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; }
111template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
112#else
113template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; }
114template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; }
115template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
116#endif
117template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; }
118template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; }
119template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; }
120template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return 6; }
121template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return 8; }
122template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
123template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
124template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; }
125template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
126template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; }
127template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; }
128template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
129
130template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; }
131template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; }
132template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; }
133template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return 6; }
134template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return 8; }
135template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
136template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; }
137template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; }
138template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
139#if Q_BYTE_ORDER == Q_BIG_ENDIAN
140template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; }
141template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; }
142template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
143#else
144template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; }
145template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; }
146template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
147#endif
148template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; }
149template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; }
150template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; }
151template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return 6; }
152template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return 8; }
153template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
154template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
155template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
156template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
157template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; }
158template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; }
159template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
160
161template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; }
162template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; }
163template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; }
164template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return 0; }
165template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return 0; }
166template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return 0; }
167template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
168template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
169template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; }
170#if Q_BYTE_ORDER == Q_BIG_ENDIAN
171template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; }
172template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; }
173template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
174#else
175template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; }
176template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; }
177template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
178#endif
179template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; }
180template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; }
181template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; }
182template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return 0; }
183template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return 0; }
184template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
185template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
186template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
187template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
188template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; }
189template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; }
190template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
191
192template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; }
193template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; }
194template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; }
195template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return 0; }
196template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return 0; }
197template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return 12; }
198template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; }
199template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; }
200template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; }
201#if Q_BYTE_ORDER == Q_BIG_ENDIAN
202template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; }
203template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; }
204template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
205#else
206template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; }
207template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; }
208template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
209#endif
210
211template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel();
212template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; }
213template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; }
214template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; }
215template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; }
216template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; }
217template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; }
218template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; }
219template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; }
220template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; }
221template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; }
222template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; }
223template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; }
224
225
226typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
227
228template <QPixelLayout::BPP bpp> static
229uint QT_FASTCALL fetchPixel(const uchar *, int)
230{
231 Q_UNREACHABLE();
232 return 0;
233}
234
235template <>
236inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
237{
238 return (src[index >> 3] >> (index & 7)) & 1;
239}
240
241template <>
242inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
243{
244 return (src[index >> 3] >> (~index & 7)) & 1;
245}
246
247template <>
248inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index)
249{
250 return src[index];
251}
252
253template <>
254inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index)
255{
256 return reinterpret_cast<const quint16 *>(src)[index];
257}
258
259template <>
260inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index)
261{
262 return reinterpret_cast<const quint24 *>(src)[index];
263}
264
265template <>
266inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index)
267{
268 return reinterpret_cast<const uint *>(src)[index];
269}
270
271template <>
272inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
273{
274 // We have to do the conversion in fetch to fit into a 32bit uint
275 QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
276 return c.toArgb32();
277}
278
279template <QPixelLayout::BPP bpp>
280static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index)
281{
282 Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
283 return fetchPixel<bpp>(src, index);
284}
285
286template <QPixelLayout::BPP width> static
287void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel);
288
289template <>
290inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel)
291{
292 reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel);
293}
294
295template <>
296inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel)
297{
298 reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel);
299}
300
301typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
302
303static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
304 0, // BPPNone
305 fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB
306 fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB
307 fetchPixel<QPixelLayout::BPP8>, // BPP8
308 fetchPixel<QPixelLayout::BPP16>, // BPP16
309 fetchPixel<QPixelLayout::BPP24>, // BPP24
310 fetchPixel<QPixelLayout::BPP32>, // BPP32
311 fetchPixel<QPixelLayout::BPP64> // BPP64
312};
313
314template<QImage::Format Format>
315static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
316{
317 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
318 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
319 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
320
321 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
322 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
323 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
324
325 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
326 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
327 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
328
329 uint red = (s >> redShift<Format>()) & redMask;
330 uint green = (s >> greenShift<Format>()) & greenMask;
331 uint blue = (s >> blueShift<Format>()) & blueMask;
332
333 red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
334 green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
335 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
336 return 0xff000000 | red | green | blue;
337}
338
339template<QImage::Format Format>
340static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *)
341{
342 for (int i = 0; i < count; ++i)
343 buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
344}
345
346#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
347extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
348#endif
349
350template<QImage::Format Format>
351static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count,
352 const QVector<QRgb> *, QDitherInfo *)
353{
354 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
355#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
356 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
357 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
358 // to vectorize the deforested version below.
359 fetchPixelsBPP24_ssse3(buffer, src, index, count);
360 convertToRGB32<Format>(buffer, count, nullptr);
361 return buffer;
362 }
363#endif
364 for (int i = 0; i < count; ++i)
365 buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i));
366 return buffer;
367}
368
369template<QImage::Format Format>
370static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
371{
372 return QRgba64::fromArgb32(convertPixelToRGB32<Format>(s));
373}
374
375template<QImage::Format Format>
376static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
377 const QVector<QRgb> *, QDitherInfo *)
378{
379 for (int i = 0; i < count; ++i)
380 buffer[i] = convertPixelToRGB64<Format>(src[i]);
381 return buffer;
382}
383
384template<QImage::Format Format>
385static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
386 const QVector<QRgb> *, QDitherInfo *)
387{
388 for (int i = 0; i < count; ++i)
389 buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
390 return buffer;
391}
392
393template<QImage::Format Format>
394static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
395{
396 Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
397 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
398 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
399 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
400
401 Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>();
402 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
403 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
404 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
405
406 Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8;
407 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
408 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
409 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
410
411 Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) ||
412 (alphaWidth<Format>() != greenWidth<Format>()) ||
413 (alphaWidth<Format>() != blueWidth<Format>());
414
415 uint alpha = (s >> alphaShift<Format>()) & alphaMask;
416 uint red = (s >> redShift<Format>()) & redMask;
417 uint green = (s >> greenShift<Format>()) & greenMask;
418 uint blue = (s >> blueShift<Format>()) & blueMask;
419
420 alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift);
421 red = (red << redLeftShift) | (red >> redRightShift);
422 green = (green << greenLeftShift) | (green >> greenRightShift);
423 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
424
425 if (mustMin) {
426 red = qMin(alpha, red);
427 green = qMin(alpha, green);
428 blue = qMin(alpha, blue);
429 }
430
431 return (alpha << 24) | (red << 16) | (green << 8) | blue;
432}
433
434template<QImage::Format Format>
435static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
436{
437 for (int i = 0; i < count; ++i)
438 buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]);
439}
440
441template<QImage::Format Format>
442static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
443 const QVector<QRgb> *, QDitherInfo *)
444{
445 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
446#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
447 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
448 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
449 // to vectorize the deforested version below.
450 fetchPixelsBPP24_ssse3(buffer, src, index, count);
451 convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
452 return buffer;
453 }
454#endif
455 for (int i = 0; i < count; ++i)
456 buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i));
457 return buffer;
458}
459
460template<QImage::Format Format>
461static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
462{
463 return QRgba64::fromArgb32(convertPixelToARGB32PM<Format>(s));
464}
465
466template<QImage::Format Format>
467static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
468 const QVector<QRgb> *, QDitherInfo *)
469{
470 for (int i = 0; i < count; ++i)
471 buffer[i] = convertPixelToRGB64<Format>(src[i]);
472 return buffer;
473}
474
475template<QImage::Format Format>
476static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
477 const QVector<QRgb> *, QDitherInfo *)
478{
479 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
480 for (int i = 0; i < count; ++i)
481 buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
482 return buffer;
483}
484
485template<QImage::Format Format, bool fromRGB>
486static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count,
487 const QVector<QRgb> *, QDitherInfo *dither)
488{
489 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
490 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
491 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
492 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
493
494 // RGB32 -> RGB888 is not a precision loss.
495 if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
496 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
497 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
498 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
499 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
500 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
501 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
502
503 for (int i = 0; i < count; ++i) {
504 const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
505 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
506 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
507 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
508 storePixel<BPP>(dest, index + i, r | g | b);
509 };
510 } else {
511 // We do ordered dither by using a rounding conversion, but instead of
512 // adding half of input precision, we add the adjusted result from the
513 // bayer matrix before narrowing.
514 // Note: Rounding conversion in itself is different from the naive
515 // conversion we do above for non-dithering.
516 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
517 for (int i = 0; i < count; ++i) {
518 const uint c = fromRGB ? src[i] : qUnpremultiply(src[i]);
519 const int d = bayer_line[(dither->x + i) & 15];
520 const int dr = d - ((d + 1) >> rWidth);
521 const int dg = d - ((d + 1) >> gWidth);
522 const int db = d - ((d + 1) >> bWidth);
523 int r = qRed(c);
524 int g = qGreen(c);
525 int b = qBlue(c);
526 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
527 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
528 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
529 const uint s = (r << redShift<Format>())
530 | (g << greenShift<Format>())
531 | (b << blueShift<Format>());
532 storePixel<BPP>(dest, index + i, s);
533 }
534 }
535}
536
537template<QImage::Format Format, bool fromRGB>
538static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
539 const QVector<QRgb> *, QDitherInfo *dither)
540{
541 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
542 if (!dither) {
543 Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1;
544 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
545 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
546 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
547
548 Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>();
549 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
550 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
551 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
552
553 Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
554 for (int i = 0; i < count; ++i) {
555 const uint c = src[i];
556 const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
557 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
558 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
559 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
560 storePixel<BPP>(dest, index + i, a | r | g | b);
561 };
562 } else {
563 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
564 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
565 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
566 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
567
568 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
569 for (int i = 0; i < count; ++i) {
570 const uint c = src[i];
571 const int d = bayer_line[(dither->x + i) & 15];
572 const int da = d - ((d + 1) >> aWidth);
573 const int dr = d - ((d + 1) >> rWidth);
574 const int dg = d - ((d + 1) >> gWidth);
575 const int db = d - ((d + 1) >> bWidth);
576 int a = qAlpha(c);
577 int r = qRed(c);
578 int g = qGreen(c);
579 int b = qBlue(c);
580 if (fromRGB)
581 a = (1 << aWidth) - 1;
582 else
583 a = (a + ((da - a) >> aWidth) + 1) >> (8 - aWidth);
584 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
585 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
586 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
587 uint s = (a << alphaShift<Format>())
588 | (r << redShift<Format>())
589 | (g << greenShift<Format>())
590 | (b << blueShift<Format>());
591 storePixel<BPP>(dest, index + i, s);
592 }
593 }
594}
595
596template<QImage::Format Format>
597static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count)
598{
599 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
600 Q_CONSTEXPR uchar aShift = alphaShift<Format>();
601 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
602 Q_CONSTEXPR uchar rShift = redShift<Format>();
603 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
604 Q_CONSTEXPR uchar gShift = greenShift<Format>();
605 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
606 Q_CONSTEXPR uchar bShift = blueShift<Format>();
607#ifdef Q_COMPILER_CONSTEXPR
608 Q_STATIC_ASSERT(rWidth == bWidth);
609#endif
610 Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1;
611 Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift)
612 | (((1 << gWidth) - 1) << gShift);
613 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
614
615 for (int i = 0; i < count; ++i) {
616 const uint c = fetchPixel<bpp>(src, i);
617 const uint r = (c >> rShift) & redBlueMask;
618 const uint b = (c >> bShift) & redBlueMask;
619 const uint t = (c & alphaGreenMask)
620 | (r << bShift)
621 | (b << rShift);
622 storePixel<bpp>(dst, i, t);
623 }
624}
625
626static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count)
627{
628 const uint *src = reinterpret_cast<const uint *>(s);
629 uint *dest = reinterpret_cast<uint *>(d);
630 for (int i = 0; i < count; ++i) {
631 const uint c = src[i];
632 const uint ag = c & 0xff00ff00;
633 const uint rb = c & 0x00ff00ff;
634 dest[i] = ag | (rb << 16) | (rb >> 16);
635 }
636}
637
638#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
639template<>
640void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
641{
642 return rbSwap_rgb32(d, s, count);
643}
644#else
645template<>
646void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
647{
648 const uint *src = reinterpret_cast<const uint *>(s);
649 uint *dest = reinterpret_cast<uint *>(d);
650 for (int i = 0; i < count; ++i) {
651 const uint c = src[i];
652 const uint rb = c & 0xff00ff00;
653 const uint ga = c & 0x00ff00ff;
654 dest[i] = ga | (rb << 16) | (rb >> 16);
655 }
656}
657#endif
658
659static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count)
660{
661 const uint *src = reinterpret_cast<const uint *>(s);
662 uint *dest = reinterpret_cast<uint *>(d);
663 for (int i = 0; i < count; ++i)
664 dest[i] = qRgbSwapRgb30(src[i]);
665}
666
667template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB()
668{
669 return QPixelLayout{
670 false,
671 false,
672 bitsPerPixel<Format>(),
673 rbSwap<Format>,
674 convertToRGB32<Format>,
675 convertToRGB64<Format>,
676 fetchRGBToRGB32<Format>,
677 fetchRGBToRGB64<Format>,
678 storeRGBFromARGB32PM<Format, false>,
679 storeRGBFromARGB32PM<Format, true>
680 };
681}
682
683template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM()
684{
685 return QPixelLayout{
686 true,
687 true,
688 bitsPerPixel<Format>(),
689 rbSwap<Format>,
690 convertARGBPMToARGB32PM<Format>,
691 convertARGBPMToRGBA64PM<Format>,
692 fetchARGBPMToARGB32PM<Format>,
693 fetchARGBPMToRGBA64PM<Format>,
694 storeARGBPMFromARGB32PM<Format, false>,
695 storeARGBPMFromARGB32PM<Format, true>
696 };
697}
698
699static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut)
700{
701 for (int i = 0; i < count; ++i)
702 buffer[i] = qPremultiply(clut->at(buffer[i]));
703}
704
705template<QPixelLayout::BPP BPP>
706static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count,
707 const QVector<QRgb> *clut, QDitherInfo *)
708{
709 for (int i = 0; i < count; ++i) {
710 const uint s = fetchPixel<BPP>(src, index + i);
711 buffer[i] = qPremultiply(clut->at(s));
712 }
713 return buffer;
714}
715
716template<QPixelLayout::BPP BPP>
717static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
718 const QVector<QRgb> *clut, QDitherInfo *)
719{
720 for (int i = 0; i < count; ++i) {
721 const uint s = fetchPixel<BPP>(src, index + i);
722 buffer[i] = QRgba64::fromArgb32(clut->at(s)).premultiplied();
723 }
724 return buffer;
725}
726
727static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
728 const QVector<QRgb> *clut, QDitherInfo *)
729{
730 for (int i = 0; i < count; ++i)
731 buffer[i] = QRgba64::fromArgb32(clut->at(src[i])).premultiplied();
732 return buffer;
733}
734
735static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *)
736{
737}
738
739static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int,
740 const QVector<QRgb> *, QDitherInfo *)
741{
742 return reinterpret_cast<const uint *>(src) + index;
743}
744
745static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int,
746 const QVector<QRgb> *, QDitherInfo *)
747{
748 return reinterpret_cast<const QRgba64 *>(src) + index;
749}
750
751static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count,
752 const QVector<QRgb> *, QDitherInfo *)
753{
754 uint *d = reinterpret_cast<uint *>(dest) + index;
755 if (d != src)
756 memcpy(d, src, count * sizeof(uint));
757}
758
759static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
760{
761 qt_convertARGB32ToARGB32PM(buffer, buffer, count);
762}
763
764static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
765 const QVector<QRgb> *, QDitherInfo *)
766{
767 return qt_convertARGB32ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
768}
769
770static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
771{
772 for (int i = 0; i < count; ++i)
773 buffer[i] = RGBA2ARGB(buffer[i]);
774}
775
776static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
777 const QVector<QRgb> *, QDitherInfo *)
778{
779 const uint *s = reinterpret_cast<const uint *>(src) + index;
780 UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB);
781 return buffer;
782}
783
784static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
785{
786 qt_convertRGBA8888ToARGB32PM(buffer, buffer, count);
787}
788
789static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
790 const QVector<QRgb> *, QDitherInfo *)
791{
792 return qt_convertRGBA8888ToARGB32PM(buffer, reinterpret_cast<const uint *>(src) + index, count);
793}
794
795static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
796{
797 for (int i = 0; i < count; ++i)
798 buffer[i] = qRgba(0, 0, 0, buffer[i]);
799}
800
801static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count,
802 const QVector<QRgb> *, QDitherInfo *)
803{
804 for (int i = 0; i < count; ++i)
805 buffer[i] = qRgba(0, 0, 0, src[index + i]);
806 return buffer;
807}
808
809static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count,
810 const QVector<QRgb> *, QDitherInfo *)
811{
812 for (int i = 0; i < count; ++i)
813 buffer[i] = QRgba64::fromRgba(0, 0, 0, src[i]);
814 return buffer;
815}
816static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
817 const QVector<QRgb> *, QDitherInfo *)
818{
819 for (int i = 0; i < count; ++i)
820 buffer[i] = QRgba64::fromRgba(0, 0, 0, src[index + i]);
821 return buffer;
822}
823
824static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
825{
826 for (int i = 0; i < count; ++i) {
827 const uint s = buffer[i];
828 buffer[i] = qRgb(s, s, s);
829 }
830}
831
832static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count,
833 const QVector<QRgb> *, QDitherInfo *)
834{
835 for (int i = 0; i < count; ++i) {
836 const uint s = src[index + i];
837 buffer[i] = qRgb(s, s, s);
838 }
839 return buffer;
840}
841
842static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count,
843 const QVector<QRgb> *, QDitherInfo *)
844{
845 for (int i = 0; i < count; ++i)
846 buffer[i] = QRgba64::fromRgba(src[i], src[i], src[i], 255);
847 return buffer;
848}
849
850static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
851 const QVector<QRgb> *, QDitherInfo *)
852{
853 for (int i = 0; i < count; ++i) {
854 const uint s = src[index + i];
855 buffer[i] = QRgba64::fromRgba(s, s, s, 255);
856 }
857 return buffer;
858}
859
860static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
861{
862 for (int i = 0; i < count; ++i) {
863 const uint x = qt_div_257(buffer[i]);
864 buffer[i] = qRgb(x, x, x);
865 }
866}
867
868static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count,
869 const QVector<QRgb> *, QDitherInfo *)
870{
871 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
872 for (int i = 0; i < count; ++i) {
873 const uint x = qt_div_257(s[i]);
874 buffer[i] = qRgb(x, x, x);
875 }
876 return buffer;
877}
878
879static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count,
880 const QVector<QRgb> *, QDitherInfo *)
881{
882 const unsigned short *s = reinterpret_cast<const unsigned short *>(src);
883 for (int i = 0; i < count; ++i)
884 buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
885 return buffer;
886}
887
888static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count,
889 const QVector<QRgb> *, QDitherInfo *)
890{
891 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
892 for (int i = 0; i < count; ++i) {
893 buffer[i] = QRgba64::fromRgba64(s[i], s[i], s[i], 65535);
894 }
895 return buffer;
896}
897
898static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count,
899 const QVector<QRgb> *, QDitherInfo *)
900{
901 uint *d = reinterpret_cast<uint *>(dest) + index;
902 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); });
903}
904
905static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
906 const QVector<QRgb> *, QDitherInfo *)
907{
908 uint *d = reinterpret_cast<uint *>(dest) + index;
909 UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA);
910}
911
912#ifdef __SSE2__
913template<bool RGBA, bool maskAlpha>
914static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
915{
916 if (count <= 0)
917 return;
918
919 const __m128i amask = _mm_set1_epi32(0xff000000);
920 int i = 0;
921 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) {
922 uint s = *src++;
923 if (maskAlpha)
924 s = s | 0xff000000;
925 if (RGBA)
926 s = RGBA2ARGB(s);
927 *buffer++ = QRgba64::fromArgb32(s);
928 }
929 for (; i < count-3; i += 4) {
930 __m128i vs = _mm_loadu_si128((const __m128i*)src);
931 if (maskAlpha)
932 vs = _mm_or_si128(vs, amask);
933 src += 4;
934 __m128i v1 = _mm_unpacklo_epi8(vs, vs);
935 __m128i v2 = _mm_unpackhi_epi8(vs, vs);
936 if (!RGBA) {
937 v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
938 v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
939 v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
940 v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
941 }
942 _mm_store_si128((__m128i*)(buffer), v1);
943 buffer += 2;
944 _mm_store_si128((__m128i*)(buffer), v2);
945 buffer += 2;
946 }
947
948 SIMD_EPILOGUE(i, count, 3) {
949 uint s = *src++;
950 if (maskAlpha)
951 s = s | 0xff000000;
952 if (RGBA)
953 s = RGBA2ARGB(s);
954 *buffer++ = QRgba64::fromArgb32(s);
955 }
956}
957
958template<QtPixelOrder PixelOrder>
959static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
960{
961 const __m128i gmask = _mm_set1_epi32(0x000ffc00);
962 const __m128i cmask = _mm_set1_epi32(0x000003ff);
963 int i = 0;
964 __m128i vr, vg, vb, va;
965 for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
966 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
967 }
968
969 for (; i < count-15; i += 16) {
970 // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
971 // so we try to avoid it by checking if it is needed 16 samples at a time.
972 __m128i vOr = _mm_set1_epi32(0);
973 __m128i vAnd = _mm_set1_epi32(0xffffffff);
974 for (int j = 0; j < 16; j += 2) {
975 __m128i vs = _mm_load_si128((const __m128i*)(buffer + j));
976 vOr = _mm_or_si128(vOr, vs);
977 vAnd = _mm_and_si128(vAnd, vs);
978 }
979 const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
980 const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
981
982 if (andAlpha == 0xffff) {
983 for (int j = 0; j < 16; j += 2) {
984 __m128i vs = _mm_load_si128((const __m128i*)buffer);
985 buffer += 2;
986 vr = _mm_srli_epi64(vs, 6);
987 vg = _mm_srli_epi64(vs, 16 + 6 - 10);
988 vb = _mm_srli_epi64(vs, 32 + 6);
989 vr = _mm_and_si128(vr, cmask);
990 vg = _mm_and_si128(vg, gmask);
991 vb = _mm_and_si128(vb, cmask);
992 va = _mm_srli_epi64(vs, 48 + 14);
993 if (PixelOrder == PixelOrderRGB)
994 vr = _mm_slli_epi32(vr, 20);
995 else
996 vb = _mm_slli_epi32(vb, 20);
997 va = _mm_slli_epi32(va, 30);
998 __m128i vd = _mm_or_si128(_mm_or_si128(vr, vg), _mm_or_si128(vb, va));
999 vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
1000 _mm_storel_epi64((__m128i*)dest, vd);
1001 dest += 2;
1002 }
1003 } else if (orAlpha == 0) {
1004 for (int j = 0; j < 16; ++j) {
1005 *dest++ = 0;
1006 buffer++;
1007 }
1008 } else {
1009 for (int j = 0; j < 16; ++j)
1010 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1011 }
1012 }
1013
1014 SIMD_EPILOGUE(i, count, 15)
1015 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1016}
1017#elif defined(__ARM_NEON__)
1018template<bool RGBA, bool maskAlpha>
1019static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count)
1020{
1021 if (count <= 0)
1022 return;
1023
1024 const uint32x4_t amask = vdupq_n_u32(0xff000000);
1025#if defined(Q_PROCESSOR_ARM_64)
1026 const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
1027#else
1028 const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
1029#endif
1030 int i = 0;
1031 for (; i < count-3; i += 4) {
1032 uint32x4_t vs32 = vld1q_u32(src);
1033 src += 4;
1034 if (maskAlpha)
1035 vs32 = vorrq_u32(vs32, amask);
1036 uint8x16_t vs8 = vreinterpretq_u8_u32(vs32);
1037 if (!RGBA) {
1038#if defined(Q_PROCESSOR_ARM_64)
1039 vs8 = vqtbl1q_u8(vs8, rgbaMask);
1040#else
1041 // no vqtbl1q_u8
1042 const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask);
1043 const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask);
1044 vs8 = vcombine_u8(vlo, vhi);
1045#endif
1046 }
1047 uint8x16x2_t v = vzipq_u8(vs8, vs8);
1048
1049 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0]));
1050 buffer += 2;
1051 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1]));
1052 buffer += 2;
1053 }
1054
1055 SIMD_EPILOGUE(i, count, 3) {
1056 uint s = *src++;
1057 if (maskAlpha)
1058 s = s | 0xff000000;
1059 if (RGBA)
1060 s = RGBA2ARGB(s);
1061 *buffer++ = QRgba64::fromArgb32(s);
1062 }
1063}
1064#endif
1065
1066static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count,
1067 const QVector<QRgb> *, QDitherInfo *)
1068{
1069#ifdef __SSE2__
1070 qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
1071#elif defined(__ARM_NEON__)
1072 qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
1073#else
1074 for (int i = 0; i < count; ++i)
1075 buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]);
1076#endif
1077 return buffer;
1078}
1079
1080static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
1081 const QVector<QRgb> *, QDitherInfo *)
1082{
1083 return convertRGB32ToRGB64(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1084}
1085
1086static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1087 const QVector<QRgb> *, QDitherInfo *)
1088{
1089 for (int i = 0; i < count; ++i)
1090 buffer[i] = QRgba64::fromArgb32(src[i]).premultiplied();
1091 return buffer;
1092}
1093
1094static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1095 const QVector<QRgb> *, QDitherInfo *)
1096{
1097 return convertARGB32ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1098}
1099
1100static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1101 const QVector<QRgb> *, QDitherInfo *)
1102{
1103#ifdef __SSE2__
1104 qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
1105#elif defined(__ARM_NEON__)
1106 qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
1107#else
1108 for (int i = 0; i < count; ++i)
1109 buffer[i] = QRgba64::fromArgb32(src[i]);
1110#endif
1111 return buffer;
1112}
1113
1114static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1115 const QVector<QRgb> *, QDitherInfo *)
1116{
1117 return convertARGB32PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1118}
1119
1120#if QT_CONFIG(raster_64bit)
1121static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
1122{
1123 for (int i = 0; i < count; ++i)
1124 buffer[i] = buffer[i].premultiplied();
1125}
1126
1127static void convertRGBA64PMToRGBA64PM(QRgba64 *, int)
1128{
1129}
1130#endif
1131
1132static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1133 const QVector<QRgb> *, QDitherInfo *)
1134{
1135 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1136 for (int i = 0; i < count; ++i)
1137 buffer[i] = QRgba64::fromRgba64(s[i]).premultiplied();
1138 return buffer;
1139}
1140
1141static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1142 const QVector<QRgb> *, QDitherInfo *)
1143{
1144 for (int i = 0; i < count; ++i)
1145 buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i])).premultiplied();
1146 return buffer;
1147}
1148
1149static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1150 const QVector<QRgb> *, QDitherInfo *)
1151{
1152 return convertRGBA8888ToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1153}
1154
1155static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1156 const QVector<QRgb> *, QDitherInfo *)
1157{
1158#ifdef __SSE2__
1159 qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
1160#elif defined(__ARM_NEON__)
1161 qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
1162#else
1163 for (int i = 0; i < count; ++i)
1164 buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
1165#endif
1166 return buffer;
1167}
1168
1169static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1170 const QVector<QRgb> *, QDitherInfo *)
1171{
1172 return convertRGBA8888PMToRGBA64PM(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1173}
1174
1175static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1176 const QVector<QRgb> *, QDitherInfo *)
1177{
1178 uint *d = reinterpret_cast<uint *>(dest) + index;
1179 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); });
1180}
1181
1182static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count,
1183 const QVector<QRgb> *, QDitherInfo *)
1184{
1185 uint *d = reinterpret_cast<uint *>(dest) + index;
1186 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); });
1187}
1188
1189static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1190 const QVector<QRgb> *, QDitherInfo *)
1191{
1192 uint *d = reinterpret_cast<uint *>(dest) + index;
1193 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); });
1194}
1195
1196template<QtPixelOrder PixelOrder>
1197static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
1198{
1199 for (int i = 0; i < count; ++i)
1200 buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]);
1201}
1202
1203template<QtPixelOrder PixelOrder>
1204static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count,
1205 const QVector<QRgb> *, QDitherInfo *dither)
1206{
1207 const uint *src = reinterpret_cast<const uint *>(s) + index;
1208 if (!dither) {
1209 UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
1210 } else {
1211 for (int i = 0; i < count; ++i) {
1212 const uint c = src[i];
1213 short d10 = (qt_bayer_matrix[dither->y & 15][(dither->x + i) & 15] << 2);
1214 short a10 = (c >> 30) * 0x155;
1215 short r10 = ((c >> 20) & 0x3ff);
1216 short g10 = ((c >> 10) & 0x3ff);
1217 short b10 = (c & 0x3ff);
1218 if (PixelOrder == PixelOrderBGR)
1219 std::swap(r10, b10);
1220 short a8 = (a10 + ((d10 - a10) >> 8)) >> 2;
1221 short r8 = (r10 + ((d10 - r10) >> 8)) >> 2;
1222 short g8 = (g10 + ((d10 - g10) >> 8)) >> 2;
1223 short b8 = (b10 + ((d10 - b10) >> 8)) >> 2;
1224 buffer[i] = qRgba(r8, g8, b8, a8);
1225 }
1226 }
1227 return buffer;
1228}
1229
1230#ifdef __SSE2__
1231template<QtPixelOrder PixelOrder>
1232static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
1233{
1234 if (count <= 0)
1235 return;
1236
1237 const __m128i rmask = _mm_set1_epi32(0x3ff00000);
1238 const __m128i gmask = _mm_set1_epi32(0x000ffc00);
1239 const __m128i bmask = _mm_set1_epi32(0x000003ff);
1240 const __m128i afactor = _mm_set1_epi16(0x5555);
1241 int i = 0;
1242
1243 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i)
1244 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1245
1246 for (; i < count-3; i += 4) {
1247 __m128i vs = _mm_loadu_si128((const __m128i*)src);
1248 src += 4;
1249 __m128i va = _mm_srli_epi32(vs, 30);
1250 __m128i vr = _mm_and_si128(vs, rmask);
1251 __m128i vb = _mm_and_si128(vs, bmask);
1252 __m128i vg = _mm_and_si128(vs, gmask);
1253 va = _mm_mullo_epi16(va, afactor);
1254 vr = _mm_or_si128(_mm_srli_epi32(vr, 14), _mm_srli_epi32(vr, 24));
1255 vg = _mm_or_si128(_mm_srli_epi32(vg, 4), _mm_srli_epi32(vg, 14));
1256 vb = _mm_or_si128(_mm_slli_epi32(vb, 6), _mm_srli_epi32(vb, 4));
1257 __m128i vrb;
1258 if (PixelOrder == PixelOrderRGB)
1259 vrb = _mm_or_si128(vr, _mm_slli_si128(vb, 2));
1260 else
1261 vrb = _mm_or_si128(vb, _mm_slli_si128(vr, 2));
1262 __m128i vga = _mm_or_si128(vg, _mm_slli_si128(va, 2));
1263 _mm_store_si128((__m128i*)(buffer), _mm_unpacklo_epi16(vrb, vga));
1264 buffer += 2;
1265 _mm_store_si128((__m128i*)(buffer), _mm_unpackhi_epi16(vrb, vga));
1266 buffer += 2;
1267 }
1268
1269 SIMD_EPILOGUE(i, count, 3)
1270 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1271}
1272#endif
1273
1274template<QtPixelOrder PixelOrder>
1275static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1276 const QVector<QRgb> *, QDitherInfo *)
1277{
1278#ifdef __SSE2__
1279 qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
1280#else
1281 for (int i = 0; i < count; ++i)
1282 buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
1283#endif
1284 return buffer;
1285}
1286
1287template<QtPixelOrder PixelOrder>
1288static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1289 const QVector<QRgb> *, QDitherInfo *)
1290{
1291 return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1292}
1293
1294template<QtPixelOrder PixelOrder>
1295static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1296 const QVector<QRgb> *, QDitherInfo *)
1297{
1298 uint *d = reinterpret_cast<uint *>(dest) + index;
1299 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
1300}
1301
1302template<QtPixelOrder PixelOrder>
1303static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count,
1304 const QVector<QRgb> *, QDitherInfo *)
1305{
1306 uint *d = reinterpret_cast<uint *>(dest) + index;
1307 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1308}
1309
1310template<QtPixelOrder PixelOrder>
1311static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1312 const QVector<QRgb> *, QDitherInfo *)
1313{
1314 uint *d = reinterpret_cast<uint *>(dest) + index;
1315 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1316}
1317
1318template<bool RGBA>
1319void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count)
1320{
1321 int i = 0;
1322#ifdef __SSE2__
1323 if (((uintptr_t)dst & 0x7) && count > 0) {
1324 uint s = (*src++).toArgb32();
1325 if (RGBA)
1326 s = ARGB2RGBA(s);
1327 *dst++ = s;
1328 i++;
1329 }
1330 const __m128i vhalf = _mm_set1_epi32(0x80);
1331 const __m128i vzero = _mm_setzero_si128();
1332 for (; i < count-1; i += 2) {
1333 __m128i vs = _mm_loadu_si128((const __m128i*)src);
1334 src += 2;
1335 if (!RGBA) {
1336 vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1337 vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1338 }
1339 __m128i v1 = _mm_unpacklo_epi16(vs, vzero);
1340 __m128i v2 = _mm_unpackhi_epi16(vs, vzero);
1341 v1 = _mm_add_epi32(v1, vhalf);
1342 v2 = _mm_add_epi32(v2, vhalf);
1343 v1 = _mm_sub_epi32(v1, _mm_srli_epi32(v1, 8));
1344 v2 = _mm_sub_epi32(v2, _mm_srli_epi32(v2, 8));
1345 v1 = _mm_srli_epi32(v1, 8);
1346 v2 = _mm_srli_epi32(v2, 8);
1347 v1 = _mm_packs_epi32(v1, v2);
1348 v1 = _mm_packus_epi16(v1, vzero);
1349 _mm_storel_epi64((__m128i*)(dst), v1);
1350 dst += 2;
1351 }
1352#endif
1353 for (; i < count; i++) {
1354 uint s = (*src++).toArgb32();
1355 if (RGBA)
1356 s = ARGB2RGBA(s);
1357 *dst++ = s;
1358 }
1359}
1360template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count);
1361template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count);
1362
1363
1364static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1365 const QVector<QRgb> *, QDitherInfo *)
1366{
1367 for (int i = 0; i < count; ++i)
1368 dest[index + i] = qAlpha(src[i]);
1369}
1370
1371static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count,
1372 const QVector<QRgb> *, QDitherInfo *)
1373{
1374 for (int i = 0; i < count; ++i)
1375 dest[index + i] = qGray(src[i]);
1376}
1377
1378static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1379 const QVector<QRgb> *, QDitherInfo *)
1380{
1381 for (int i = 0; i < count; ++i)
1382 dest[index + i] = qGray(qUnpremultiply(src[i]));
1383}
1384
1385static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count,
1386 const QVector<QRgb> *, QDitherInfo *)
1387{
1388 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1389 for (int i = 0; i < count; ++i)
1390 d[i] = qGray(src[i]) * 257;
1391}
1392
1393static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1394 const QVector<QRgb> *, QDitherInfo *)
1395{
1396 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1397 for (int i = 0; i < count; ++i)
1398 d[i] = qGray(qUnpremultiply(src[i])) * 257;
1399}
1400
1401static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count,
1402 const QVector<QRgb> *, QDitherInfo *)
1403{
1404 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1405 for (int i = 0; i < count; ++i)
1406 buffer[i] = toArgb32(s[i]);
1407 return buffer;
1408}
1409
1410static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count,
1411 const QVector<QRgb> *, QDitherInfo *)
1412{
1413 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1414 for (int i = 0; i < count; ++i)
1415 d[i] = QRgba64::fromArgb32(src[i]);
1416}
1417
1418static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
1419 const QVector<QRgb> *, QDitherInfo *)
1420{
1421 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1422 for (int i = 0; i < count; ++i)
1423 buffer[i] = toArgb32(s[i].premultiplied());
1424 return buffer;
1425}
1426
1427static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1428 const QVector<QRgb> *, QDitherInfo *)
1429{
1430 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1431 for (int i = 0; i < count; ++i)
1432 d[i] = QRgba64::fromArgb32(src[i]).unpremultiplied();
1433}
1434
1435// Note:
1436// convertToArgb32() assumes that no color channel is less than 4 bits.
1437// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
1438// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
1439QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
1440 { false, false, QPixelLayout::BPPNone, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, // Format_Invalid
1441 { false, false, QPixelLayout::BPP1MSB, nullptr,
1442 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1443 fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
1444 nullptr, nullptr }, // Format_Mono
1445 { false, false, QPixelLayout::BPP1LSB, nullptr,
1446 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1447 fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
1448 nullptr, nullptr }, // Format_MonoLSB
1449 { false, false, QPixelLayout::BPP8, nullptr,
1450 convertIndexedToARGB32PM, convertIndexedToRGBA64PM,
1451 fetchIndexedToARGB32PM<QPixelLayout::BPP8>, fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
1452 nullptr, nullptr }, // Format_Indexed8
1453 // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
1454 // but everywhere this generic conversion would be wrong is currently overloaded.
1455 { false, false, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1456 convertRGB32ToRGB64, fetchPassThrough, fetchRGB32ToRGB64, storePassThrough, storePassThrough }, // Format_RGB32
1457 { true, false, QPixelLayout::BPP32, rbSwap_rgb32, convertARGB32ToARGB32PM,
1458 convertARGB32ToRGBA64PM, fetchARGB32ToARGB32PM, fetchARGB32ToRGBA64PM, storeARGB32FromARGB32PM, storePassThrough }, // Format_ARGB32
1459 { true, true, QPixelLayout::BPP32, rbSwap_rgb32, convertPassThrough,
1460 convertARGB32PMToRGBA64PM, fetchPassThrough, fetchARGB32PMToRGBA64PM, storePassThrough, storePassThrough }, // Format_ARGB32_Premultiplied
1461 pixelLayoutRGB<QImage::Format_RGB16>(),
1462 pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
1463 pixelLayoutRGB<QImage::Format_RGB666>(),
1464 pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(),
1465 pixelLayoutRGB<QImage::Format_RGB555>(),
1466 pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(),
1467 pixelLayoutRGB<QImage::Format_RGB888>(),
1468 pixelLayoutRGB<QImage::Format_RGB444>(),
1469 pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
1470 { false, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1471 convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBXFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBX8888
1472 { true, false, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888ToARGB32PM,
1473 convertRGBA8888ToRGBA64PM, fetchRGBA8888ToARGB32PM, fetchRGBA8888ToRGBA64PM, storeRGBA8888FromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888
1474 { true, true, QPixelLayout::BPP32, rbSwap<QImage::Format_RGBA8888>, convertRGBA8888PMToARGB32PM,
1475 convertRGBA8888PMToRGBA64PM, fetchRGBA8888PMToARGB32PM, fetchRGBA8888PMToRGBA64PM, storeRGBA8888PMFromARGB32PM, storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
1476 { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1477 convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1478 convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1479 fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1480 fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1481 storeRGB30FromARGB32PM<PixelOrderBGR>,
1482 storeRGB30FromRGB32<PixelOrderBGR>
1483 }, // Format_BGR30
1484 { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1485 convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1486 convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1487 fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1488 fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1489 storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
1490 storeRGB30FromRGB32<PixelOrderBGR>
1491 }, // Format_A2BGR30_Premultiplied
1492 { false, false, QPixelLayout::BPP32, rbSwap_rgb30,
1493 convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1494 convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1495 fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1496 fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1497 storeRGB30FromARGB32PM<PixelOrderRGB>,
1498 storeRGB30FromRGB32<PixelOrderRGB>
1499 }, // Format_RGB30
1500 { true, true, QPixelLayout::BPP32, rbSwap_rgb30,
1501 convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1502 convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1503 fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1504 fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1505 storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
1506 storeRGB30FromRGB32<PixelOrderRGB>
1507 }, // Format_A2RGB30_Premultiplied
1508 { true, true, QPixelLayout::BPP8, nullptr,
1509 convertAlpha8ToRGB32, convertAlpha8ToRGB64,
1510 fetchAlpha8ToRGB32, fetchAlpha8ToRGB64,
1511 storeAlpha8FromARGB32PM, nullptr }, // Format_Alpha8
1512 { false, false, QPixelLayout::BPP8, nullptr,
1513 convertGrayscale8ToRGB32, convertGrayscale8ToRGB64,
1514 fetchGrayscale8ToRGB32, fetchGrayscale8ToRGB64,
1515 storeGrayscale8FromARGB32PM, storeGrayscale8FromRGB32 }, // Format_Grayscale8
1516 { false, false, QPixelLayout::BPP64, nullptr,
1517 convertPassThrough, nullptr,
1518 fetchRGB64ToRGB32, fetchPassThrough64,
1519 storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBX64
1520 { true, false, QPixelLayout::BPP64, nullptr,
1521 convertARGB32ToARGB32PM, nullptr,
1522 fetchRGBA64ToARGB32PM, fetchRGBA64ToRGBA64PM,
1523 storeRGBA64FromARGB32PM, storeRGB64FromRGB32 }, // Format_RGBA64
1524 { true, true, QPixelLayout::BPP64, nullptr,
1525 convertPassThrough, nullptr,
1526 fetchRGB64ToRGB32, fetchPassThrough64,
1527 storeRGB64FromRGB32, storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied
1528 { false, false, QPixelLayout::BPP16, nullptr,
1529 convertGrayscale16ToRGB32, convertGrayscale16ToRGBA64,
1530 fetchGrayscale16ToRGB32, fetchGrayscale16ToRGBA64,
1531 storeGrayscale16FromARGB32PM, storeGrayscale16FromRGB32 } // Format_Grayscale16
1532};
1533
1534Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
1535
1536static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
1537{
1538 for (int i = 0; i < length; ++i) {
1539 dest[i] = toArgb32(src[i]);
1540 }
1541}
1542
1543template<QImage::Format format>
1544static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1545 const QVector<QRgb> *clut, QDitherInfo *dither)
1546{
1547 uint buffer[BufferSize];
1548 convertFromRgb64(buffer, src, count);
1549 qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
1550}
1551
1552static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1553 const QVector<QRgb> *, QDitherInfo *)
1554{
1555 uint *d = (uint*)dest + index;
1556 for (int i = 0; i < count; ++i)
1557 d[i] = toArgb32(src[i].unpremultiplied());
1558}
1559
1560static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1561 const QVector<QRgb> *, QDitherInfo *)
1562{
1563 uint *d = (uint*)dest + index;
1564 for (int i = 0; i < count; ++i)
1565 d[i] = toRgba8888(src[i].unpremultiplied());
1566}
1567
1568template<QtPixelOrder PixelOrder>
1569static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1570 const QVector<QRgb> *, QDitherInfo *)
1571{
1572 uint *d = (uint*)dest + index;
1573#ifdef __SSE2__
1574 qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
1575#else
1576 for (int i = 0; i < count; ++i)
1577 d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
1578#endif
1579}
1580
1581static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1582 const QVector<QRgb> *, QDitherInfo *)
1583{
1584 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1585 for (int i = 0; i < count; ++i) {
1586 d[i] = src[i].unpremultiplied();
1587 d[i].setAlpha(65535);
1588 }
1589}
1590
1591static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1592 const QVector<QRgb> *, QDitherInfo *)
1593{
1594 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1595 for (int i = 0; i < count; ++i)
1596 d[i] = src[i].unpremultiplied();
1597}
1598
1599static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1600 const QVector<QRgb> *, QDitherInfo *)
1601{
1602 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1603 if (d != src)
1604 memcpy(d, src, count * sizeof(QRgba64));
1605}
1606
1607static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1608 const QVector<QRgb> *, QDitherInfo *)
1609{
1610 quint16 *d = reinterpret_cast<quint16*>(dest) + index;
1611 for (int i = 0; i < count; ++i) {
1612 QRgba64 s = src[i].unpremultiplied();
1613 d[i] = qGray(s.red(), s.green(), s.blue());
1614 }
1615}
1616
1617ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
1618 nullptr,
1619 nullptr,
1620 nullptr,
1621 nullptr,
1622 storeGenericFromRGBA64PM<QImage::Format_RGB32>,
1623 storeARGB32FromRGBA64PM,
1624 storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
1625 storeGenericFromRGBA64PM<QImage::Format_RGB16>,
1626 storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
1627 storeGenericFromRGBA64PM<QImage::Format_RGB666>,
1628 storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
1629 storeGenericFromRGBA64PM<QImage::Format_RGB555>,
1630 storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
1631 storeGenericFromRGBA64PM<QImage::Format_RGB888>,
1632 storeGenericFromRGBA64PM<QImage::Format_RGB444>,
1633 storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
1634 storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
1635 storeRGBA8888FromRGBA64PM,
1636 storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
1637 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1638 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1639 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1640 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1641 storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
1642 storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
1643 storeRGBX64FromRGBA64PM,
1644 storeRGBA64FromRGBA64PM,
1645 storeRGBA64PMFromRGBA64PM,
1646 storeGray16FromRGBA64PM
1647};
1648
1649/*
1650 Destination fetch. This is simple as we don't have to do bounds checks or
1651 transformations
1652*/
1653
1654static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1655{
1656 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1657 uint *start = buffer;
1658 const uint *end = buffer + length;
1659 while (buffer < end) {
1660 *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1661 ++buffer;
1662 ++x;
1663 }
1664 return start;
1665}
1666
1667static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1668{
1669 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1670 uint *start = buffer;
1671 const uint *end = buffer + length;
1672 while (buffer < end) {
1673 *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1674 ++buffer;
1675 ++x;
1676 }
1677 return start;
1678}
1679
1680static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
1681{
1682 return (uint *)rasterBuffer->scanLine(y) + x;
1683}
1684
1685static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1686{
1687 const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
1688 for (int i = 0; i < length; ++i)
1689 buffer[i] = qConvertRgb16To32(data[i]);
1690 return buffer;
1691}
1692
1693static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1694{
1695 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1696 return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1697}
1698
1699static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
1700{
1701 return buffer;
1702}
1703
1704static DestFetchProc destFetchProc[QImage::NImageFormats] =
1705{
1706 0, // Format_Invalid
1707 destFetchMono, // Format_Mono,
1708 destFetchMonoLsb, // Format_MonoLSB
1709 0, // Format_Indexed8
1710 destFetchARGB32P, // Format_RGB32
1711 destFetch, // Format_ARGB32,
1712 destFetchARGB32P, // Format_ARGB32_Premultiplied
1713 destFetchRGB16, // Format_RGB16
1714 destFetch, // Format_ARGB8565_Premultiplied
1715 destFetch, // Format_RGB666
1716 destFetch, // Format_ARGB6666_Premultiplied
1717 destFetch, // Format_RGB555
1718 destFetch, // Format_ARGB8555_Premultiplied
1719 destFetch, // Format_RGB888
1720 destFetch, // Format_RGB444
1721 destFetch, // Format_ARGB4444_Premultiplied
1722 destFetch, // Format_RGBX8888
1723 destFetch, // Format_RGBA8888
1724 destFetch, // Format_RGBA8888_Premultiplied
1725 destFetch, // Format_BGR30
1726 destFetch, // Format_A2BGR30_Premultiplied
1727 destFetch, // Format_RGB30
1728 destFetch, // Format_A2RGB30_Premultiplied
1729 destFetch, // Format_Alpha8
1730 destFetch, // Format_Grayscale8
1731 destFetch, // Format_RGBX64
1732 destFetch, // Format_RGBA64
1733 destFetch, // Format_RGBA64_Premultiplied
1734 destFetch, // Format_Grayscale16
1735};
1736
1737#if QT_CONFIG(raster_64bit)
1738static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1739{
1740 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1741 return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1742}
1743
1744static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
1745{
1746 return (QRgba64 *)rasterBuffer->scanLine(y) + x;
1747}
1748
1749static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
1750{
1751 return buffer;
1752}
1753
1754static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
1755{
1756 0, // Format_Invalid
1757 0, // Format_Mono,
1758 0, // Format_MonoLSB
1759 0, // Format_Indexed8
1760 destFetch64, // Format_RGB32
1761 destFetch64, // Format_ARGB32,
1762 destFetch64, // Format_ARGB32_Premultiplied
1763 destFetch64, // Format_RGB16
1764 destFetch64, // Format_ARGB8565_Premultiplied
1765 destFetch64, // Format_RGB666
1766 destFetch64, // Format_ARGB6666_Premultiplied
1767 destFetch64, // Format_RGB555
1768 destFetch64, // Format_ARGB8555_Premultiplied
1769 destFetch64, // Format_RGB888
1770 destFetch64, // Format_RGB444
1771 destFetch64, // Format_ARGB4444_Premultiplied
1772 destFetch64, // Format_RGBX8888
1773 destFetch64, // Format_RGBA8888
1774 destFetch64, // Format_RGBA8888_Premultiplied
1775 destFetch64, // Format_BGR30
1776 destFetch64, // Format_A2BGR30_Premultiplied
1777 destFetch64, // Format_RGB30
1778 destFetch64, // Format_A2RGB30_Premultiplied
1779 destFetch64, // Format_Alpha8
1780 destFetch64, // Format_Grayscale8
1781 destFetchRGB64, // Format_RGBX64
1782 destFetch64, // Format_RGBA64
1783 destFetchRGB64, // Format_RGBA64_Premultiplied
1784 destFetch64, // Format_Grayscale16
1785};
1786#endif
1787
1788/*
1789 Returns the color in the mono destination color table
1790 that is the "nearest" to /color/.
1791*/
1792static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
1793{
1794 QRgb color_0 = qPremultiply(rbuf->destColor0);
1795 QRgb color_1 = qPremultiply(rbuf->destColor1);
1796 color = qPremultiply(color);
1797
1798 int r = qRed(color);
1799 int g = qGreen(color);
1800 int b = qBlue(color);
1801 int rx, gx, bx;
1802 int dist_0, dist_1;
1803
1804 rx = r - qRed(color_0);
1805 gx = g - qGreen(color_0);
1806 bx = b - qBlue(color_0);
1807 dist_0 = rx*rx + gx*gx + bx*bx;
1808
1809 rx = r - qRed(color_1);
1810 gx = g - qGreen(color_1);
1811 bx = b - qBlue(color_1);
1812 dist_1 = rx*rx + gx*gx + bx*bx;
1813
1814 if (dist_0 < dist_1)
1815 return color_0;
1816 return color_1;
1817}
1818
1819/*
1820 Destination store.
1821*/
1822
1823static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1824{
1825 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1826 if (rasterBuffer->monoDestinationWithClut) {
1827 for (int i = 0; i < length; ++i) {
1828 if (buffer[i] == rasterBuffer->destColor0) {
1829 data[x >> 3] &= ~(0x80 >> (x & 7));
1830 } else if (buffer[i] == rasterBuffer->destColor1) {
1831 data[x >> 3] |= 0x80 >> (x & 7);
1832 } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1833 data[x >> 3] &= ~(0x80 >> (x & 7));
1834 } else {
1835 data[x >> 3] |= 0x80 >> (x & 7);
1836 }
1837 ++x;
1838 }
1839 } else {
1840 for (int i = 0; i < length; ++i) {
1841 if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1842 data[x >> 3] |= 0x80 >> (x & 7);
1843 else
1844 data[x >> 3] &= ~(0x80 >> (x & 7));
1845 ++x;
1846 }
1847 }
1848}
1849
1850static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1851{
1852 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1853 if (rasterBuffer->monoDestinationWithClut) {
1854 for (int i = 0; i < length; ++i) {
1855 if (buffer[i] == rasterBuffer->destColor0) {
1856 data[x >> 3] &= ~(1 << (x & 7));
1857 } else if (buffer[i] == rasterBuffer->destColor1) {
1858 data[x >> 3] |= 1 << (x & 7);
1859 } else if (findNearestColor(buffer[i], rasterBuffer) == rasterBuffer->destColor0) {
1860 data[x >> 3] &= ~(1 << (x & 7));
1861 } else {
1862 data[x >> 3] |= 1 << (x & 7);
1863 }
1864 ++x;
1865 }
1866 } else {
1867 for (int i = 0; i < length; ++i) {
1868 if (qGray(buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1869 data[x >> 3] |= 1 << (x & 7);
1870 else
1871 data[x >> 3] &= ~(1 << (x & 7));
1872 ++x;
1873 }
1874 }
1875}
1876
1877static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1878{
1879 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
1880 for (int i = 0; i < length; ++i)
1881 data[i] = qConvertRgb32To16(buffer[i]);
1882}
1883
1884static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1885{
1886 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1887 ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
1888 if (!layout->premultiplied && !layout->hasAlphaChannel)
1889 store = layout->storeFromRGB32;
1890 uchar *dest = rasterBuffer->scanLine(y);
1891 store(dest, buffer, x, length, nullptr, nullptr);
1892}
1893
1894static DestStoreProc destStoreProc[QImage::NImageFormats] =
1895{
1896 0, // Format_Invalid
1897 destStoreMono, // Format_Mono,
1898 destStoreMonoLsb, // Format_MonoLSB
1899 0, // Format_Indexed8
1900 0, // Format_RGB32
1901 destStore, // Format_ARGB32,
1902 0, // Format_ARGB32_Premultiplied
1903 destStoreRGB16, // Format_RGB16
1904 destStore, // Format_ARGB8565_Premultiplied
1905 destStore, // Format_RGB666
1906 destStore, // Format_ARGB6666_Premultiplied
1907 destStore, // Format_RGB555
1908 destStore, // Format_ARGB8555_Premultiplied
1909 destStore, // Format_RGB888
1910 destStore, // Format_RGB444
1911 destStore, // Format_ARGB4444_Premultiplied
1912 destStore, // Format_RGBX8888
1913 destStore, // Format_RGBA8888
1914 destStore, // Format_RGBA8888_Premultiplied
1915 destStore, // Format_BGR30
1916 destStore, // Format_A2BGR30_Premultiplied
1917 destStore, // Format_RGB30
1918 destStore, // Format_A2RGB30_Premultiplied
1919 destStore, // Format_Alpha8
1920 destStore, // Format_Grayscale8
1921 destStore, // Format_RGBX64
1922 destStore, // Format_RGBA64
1923 destStore, // Format_RGBA64_Premultiplied
1924 destStore, // Format_Grayscale16
1925};
1926
1927#if QT_CONFIG(raster_64bit)
1928static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1929{
1930 auto store = qStoreFromRGBA64PM[rasterBuffer->format];
1931 uchar *dest = rasterBuffer->scanLine(y);
1932 store(dest, buffer, x, length, nullptr, nullptr);
1933}
1934
1935static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1936{
1937 QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
1938 for (int i = 0; i < length; ++i) {
1939 dest[i] = buffer[i].unpremultiplied();
1940 }
1941}
1942
1943static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
1944{
1945 0, // Format_Invalid
1946 0, // Format_Mono,
1947 0, // Format_MonoLSB
1948 0, // Format_Indexed8
1949 destStore64, // Format_RGB32
1950 destStore64, // Format_ARGB32,
1951 destStore64, // Format_ARGB32_Premultiplied
1952 destStore64, // Format_RGB16
1953 destStore64, // Format_ARGB8565_Premultiplied
1954 destStore64, // Format_RGB666
1955 destStore64, // Format_ARGB6666_Premultiplied
1956 destStore64, // Format_RGB555
1957 destStore64, // Format_ARGB8555_Premultiplied
1958 destStore64, // Format_RGB888
1959 destStore64, // Format_RGB444
1960 destStore64, // Format_ARGB4444_Premultiplied
1961 destStore64, // Format_RGBX8888
1962 destStore64, // Format_RGBA8888
1963 destStore64, // Format_RGBA8888_Premultiplied
1964 destStore64, // Format_BGR30
1965 destStore64, // Format_A2BGR30_Premultiplied
1966 destStore64, // Format_RGB30
1967 destStore64, // Format_A2RGB30_Premultiplied
1968 destStore64, // Format_Alpha8
1969 destStore64, // Format_Grayscale8
1970 0, // Format_RGBX64
1971 destStore64RGBA64, // Format_RGBA64
1972 0, // Format_RGBA64_Premultiplied
1973 destStore64, // Format_Grayscale16
1974};
1975#endif
1976
1977/*
1978 Source fetches
1979
1980 This is a bit more complicated, as we need several fetch routines for every surface type
1981
1982 We need 5 fetch methods per surface type:
1983 untransformed
1984 transformed (tiled and not tiled)
1985 transformed bilinear (tiled and not tiled)
1986
1987 We don't need bounds checks for untransformed, but we need them for the other ones.
1988
1989 The generic implementation does pixel by pixel fetches
1990*/
1991
1992enum TextureBlendType {
1993 BlendUntransformed,
1994 BlendTiled,
1995 BlendTransformed,
1996 BlendTransformedTiled,
1997 BlendTransformedBilinear,
1998 BlendTransformedBilinearTiled,
1999 NBlendTypes
2000};
2001
2002static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
2003 const QSpanData *data, int y, int x, int length)
2004{
2005 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2006 return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2007}
2008
2009static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
2010 const QSpanData *data, int y, int x, int)
2011{
2012 const uchar *scanLine = data->texture.scanLine(y);
2013 return reinterpret_cast<const uint *>(scanLine) + x;
2014}
2015
2016static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
2017 const QSpanData *data, int y, int x,
2018 int length)
2019{
2020 const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
2021 for (int i = 0; i < length; ++i)
2022 buffer[i] = qConvertRgb16To32(scanLine[i]);
2023 return buffer;
2024}
2025
2026#if QT_CONFIG(raster_64bit)
2027static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
2028 const QSpanData *data, int y, int x, int length)
2029{
2030 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2031 return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2032}
2033
2034static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
2035 const QSpanData *data, int y, int x, int)
2036{
2037 const uchar *scanLine = data->texture.scanLine(y);
2038 return reinterpret_cast<const QRgba64 *>(scanLine) + x;
2039}
2040#endif
2041
2042template<TextureBlendType blendType>
2043inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
2044{
2045 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2046 if (blendType == BlendTransformedTiled) {
2047 if (v < 0 || v >= max) {
2048 v %= max;
2049 if (v < 0) v += max;
2050 }
2051 } else {
2052 v = qBound(l1, v, l2);
2053 }
2054}
2055
2056static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
2057{
2058 if (Q_UNLIKELY(!data->fast_matrix))
2059 return false;
2060
2061 qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
2062 qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
2063 qreal minc = std::min(fx, fy);
2064 qreal maxc = std::max(fx, fy);
2065 fx += std::trunc(data->m11 * fixed_scale) * length;
2066 fy += std::trunc(data->m12 * fixed_scale) * length;
2067 minc = std::min(minc, std::min(fx, fy));
2068 maxc = std::max(maxc, std::max(fx, fy));
2069
2070 return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
2071}
2072
2073template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2074static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
2075 int y, int x, int length)
2076{
2077 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2078 const QTextureData &image = data->texture;
2079
2080 const qreal cx = x + qreal(0.5);
2081 const qreal cy = y + qreal(0.5);
2082
2083 constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
2084 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2085 if (!useFetch)
2086 Q_ASSERT(layout->bpp == bpp);
2087 // When templated 'fetch' should be inlined at compile time:
2088 const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
2089
2090 if (canUseFastMatrixPath(cx, cy, length, data)) {
2091 // The increment pr x in the scanline
2092 int fdx = (int)(data->m11 * fixed_scale);
2093 int fdy = (int)(data->m12 * fixed_scale);
2094
2095 int fx = int((data->m21 * cy
2096 + data->m11 * cx + data->dx) * fixed_scale);
2097 int fy = int((data->m22 * cy
2098 + data->m12 * cx + data->dy) * fixed_scale);
2099
2100 if (fdy == 0) { // simple scale, no rotation or shear
2101 int py = (fy >> 16);
2102 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2103 const uchar *src = image.scanLine(py);
2104
2105 int i = 0;
2106 if (blendType == BlendTransformed) {
2107 int fastLen = length;
2108 if (fdx > 0)
2109 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2110 else if (fdx < 0)
2111 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2112
2113 for (; i < fastLen; ++i) {
2114 int x1 = (fx >> 16);
2115 int x2 = x1;
2116 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2117 if (x1 == x2)
2118 break;
2119 if (useFetch)
2120 buffer[i] = fetch(src, x1);
2121 else
2122 buffer[i] = reinterpret_cast<const T*>(src)[x1];
2123 fx += fdx;
2124 }
2125
2126 for (; i < fastLen; ++i) {
2127 int px = (fx >> 16);
2128 if (useFetch)
2129 buffer[i] = fetch(src, px);
2130 else
2131 buffer[i] = reinterpret_cast<const T*>(src)[px];
2132 fx += fdx;
2133 }
2134 }
2135
2136 for (; i < length; ++i) {
2137 int px = (fx >> 16);
2138 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2139 if (useFetch)
2140 buffer[i] = fetch(src, px);
2141 else
2142 buffer[i] = reinterpret_cast<const T*>(src)[px];
2143 fx += fdx;
2144 }
2145 } else { // rotation or shear
2146 int i = 0;
2147 if (blendType == BlendTransformed) {
2148 int fastLen = length;
2149 if (fdx > 0)
2150 fastLen = qMin(fastLen, int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2151 else if (fdx < 0)
2152 fastLen = qMin(fastLen, int((qint64(image.x1) * fixed_scale - fx) / fdx));
2153 if (fdy > 0)
2154 fastLen = qMin(fastLen, int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2155 else if (fdy < 0)
2156 fastLen = qMin(fastLen, int((qint64(image.y1) * fixed_scale - fy) / fdy));
2157
2158 for (; i < fastLen; ++i) {
2159 int x1 = (fx >> 16);
2160 int y1 = (fy >> 16);
2161 int x2 = x1;
2162 int y2 = y1;
2163 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2164 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
2165 if (x1 == x2 && y1 == y2)
2166 break;
2167 if (useFetch)
2168 buffer[i] = fetch(image.scanLine(y1), x1);
2169 else
2170 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y1))[x1];
2171 fx += fdx;
2172 fy += fdy;
2173 }
2174
2175 for (; i < fastLen; ++i) {
2176 int px = (fx >> 16);
2177 int py = (fy >> 16);
2178 if (useFetch)
2179 buffer[i] = fetch(image.scanLine(py), px);
2180 else
2181 buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2182 fx += fdx;
2183 fy += fdy;
2184 }
2185 }
2186
2187 for (; i < length; ++i) {
2188 int px = (fx >> 16);
2189 int py = (fy >> 16);
2190 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2191 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2192 if (useFetch)
2193 buffer[i] = fetch(image.scanLine(py), px);
2194 else
2195 buffer[i] = reinterpret_cast<const T*>(image.scanLine(py))[px];
2196 fx += fdx;
2197 fy += fdy;
2198 }
2199 }
2200 } else {
2201 const qreal fdx = data->m11;
2202 const qreal fdy = data->m12;
2203 const qreal fdw = data->m13;
2204
2205 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2206 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2207 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2208
2209 T *const end = buffer + length;
2210 T *b = buffer;
2211 while (b < end) {
2212 const qreal iw = fw == 0 ? 1 : 1 / fw;
2213 const qreal tx = fx * iw;
2214 const qreal ty = fy * iw;
2215 int px = qFloor(tx);
2216 int py = qFloor(ty);
2217
2218 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2219 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2220 if (useFetch)
2221 *b = fetch(image.scanLine(py), px);
2222 else
2223 *b = reinterpret_cast<const T*>(image.scanLine(py))[px];
2224
2225 fx += fdx;
2226 fy += fdy;
2227 fw += fdw;
2228 //force increment to avoid /0
2229 if (!fw) {
2230 fw += fdw;
2231 }
2232 ++b;
2233 }
2234 }
2235}
2236
2237template<TextureBlendType blendType, QPixelLayout::BPP bpp>
2238static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
2239 int y, int x, int length)
2240{
2241 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2242 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2243 fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
2244 layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
2245 return buffer;
2246}
2247
2248#if QT_CONFIG(raster_64bit)
2249template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
2250static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
2251 int y, int x, int length)
2252{
2253 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2254 if (layout->bpp != QPixelLayout::BPP64) {
2255 uint buffer32[BufferSize];
2256 Q_ASSERT(length <= BufferSize);
2257 if (layout->bpp == QPixelLayout::BPP32)
2258 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
2259 else
2260 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
2261 return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
2262 }
2263
2264 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
2265 if (data->texture.format == QImage::Format_RGBA64)
2266 convertRGBA64ToRGBA64PM(buffer, length);
2267 return buffer;
2268}
2269#endif
2270
2271/** \internal
2272 interpolate 4 argb pixels with the distx and disty factor.
2273 distx and disty must be between 0 and 16
2274 */
2275static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
2276{
2277 uint distxy = distx * disty;
2278 //idistx * disty = (16-distx) * disty = 16*disty - distxy
2279 //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
2280 uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy);
2281 uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy);
2282 uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy));
2283 uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
2284 uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy));
2285 uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
2286 uint brrb = ((br & 0x00ff00ff) * (distxy));
2287 uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
2288 return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
2289}
2290
2291#if defined(__SSE2__)
2292#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
2293{ \
2294 const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
2295 const __m128i distx_ = _mm_slli_epi16(distx, 4); \
2296 const __m128i disty_ = _mm_slli_epi16(disty, 4); \
2297 const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
2298 const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
2299 const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
2300 \
2301 __m128i tlAG = _mm_srli_epi16(tl, 8); \
2302 __m128i tlRB = _mm_and_si128(tl, colorMask); \
2303 __m128i trAG = _mm_srli_epi16(tr, 8); \
2304 __m128i trRB = _mm_and_si128(tr, colorMask); \
2305 __m128i blAG = _mm_srli_epi16(bl, 8); \
2306 __m128i blRB = _mm_and_si128(bl, colorMask); \
2307 __m128i brAG = _mm_srli_epi16(br, 8); \
2308 __m128i brRB = _mm_and_si128(br, colorMask); \
2309 \
2310 tlAG = _mm_mullo_epi16(tlAG, idxidy); \
2311 tlRB = _mm_mullo_epi16(tlRB, idxidy); \
2312 trAG = _mm_mullo_epi16(trAG, dxidy); \
2313 trRB = _mm_mullo_epi16(trRB, dxidy); \
2314 blAG = _mm_mullo_epi16(blAG, idxdy); \
2315 blRB = _mm_mullo_epi16(blRB, idxdy); \
2316 brAG = _mm_mullo_epi16(brAG, dxdy); \
2317 brRB = _mm_mullo_epi16(brRB, dxdy); \
2318 \
2319 /* Add the values, and shift to only keep 8 significant bits per colors */ \
2320 __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
2321 __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
2322 rAG = _mm_andnot_si128(colorMask, rAG); \
2323 rRB = _mm_srli_epi16(rRB, 8); \
2324 _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
2325}
2326#endif
2327
2328#if defined(__ARM_NEON__)
2329#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
2330{ \
2331 const int16x8_t dxdy = vmulq_s16(distx, disty); \
2332 const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
2333 const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
2334 const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
2335 const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
2336 \
2337 int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
2338 int16x8_t tlRB = vandq_s16(tl, colorMask); \
2339 int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
2340 int16x8_t trRB = vandq_s16(tr, colorMask); \
2341 int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
2342 int16x8_t blRB = vandq_s16(bl, colorMask); \
2343 int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
2344 int16x8_t brRB = vandq_s16(br, colorMask); \
2345 \
2346 int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
2347 int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
2348 rAG = vmlaq_s16(rAG, trAG, dxidy); \
2349 rRB = vmlaq_s16(rRB, trRB, dxidy); \
2350 rAG = vmlaq_s16(rAG, blAG, idxdy); \
2351 rRB = vmlaq_s16(rRB, blRB, idxdy); \
2352 rAG = vmlaq_s16(rAG, brAG, dxdy); \
2353 rRB = vmlaq_s16(rRB, brRB, dxdy); \
2354 \
2355 rAG = vandq_s16(invColorMask, rAG); \
2356 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
2357 vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
2358}
2359#endif
2360
2361template<TextureBlendType blendType>
2362void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
2363
2364template<>
2365inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
2366{
2367 v1 %= max;
2368 if (v1 < 0)
2369 v1 += max;
2370 v2 = v1 + 1;
2371 if (v2 == max)
2372 v2 = 0;
2373 Q_ASSERT(v1 >= 0 && v1 < max);
2374 Q_ASSERT(v2 >= 0 && v2 < max);
2375}
2376
2377template<>
2378inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
2379{
2380 if (v1 < l1)
2381 v2 =