1/****************************************************************************
2**
3** Copyright (C) 2018 The Qt Company Ltd.
4** Copyright (C) 2018 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtGui module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41#include <qglobal.h>
42
43#include <qstylehints.h>
44#include <qguiapplication.h>
45#include <qatomic.h>
46#include <private/qcolortrclut_p.h>
47#include <private/qdrawhelper_p.h>
48#include <private/qpaintengine_raster_p.h>
49#include <private/qpainter_p.h>
50#include <private/qdrawhelper_x86_p.h>
51#include <private/qdrawingprimitive_sse2_p.h>
52#include <private/qdrawhelper_neon_p.h>
53#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
54#include <private/qdrawhelper_mips_dsp_p.h>
55#endif
56#include <private/qguiapplication_p.h>
57#include <private/qrgba64_p.h>
58#include <qendian.h>
59#include <qloggingcategory.h>
60#include <qmath.h>
61
62QT_BEGIN_NAMESPACE
63
64Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
65
66#define MASK(src, a) src = BYTE_MUL(src, a)
67
68/*
69 constants and structures
70*/
71
72enum {
73 fixed_scale = 1 << 16,
74 half_point = 1 << 15
75};
76
77template<QImage::Format> Q_DECL_CONSTEXPR uint redWidth();
78template<QImage::Format> Q_DECL_CONSTEXPR uint redShift();
79template<QImage::Format> Q_DECL_CONSTEXPR uint greenWidth();
80template<QImage::Format> Q_DECL_CONSTEXPR uint greenShift();
81template<QImage::Format> Q_DECL_CONSTEXPR uint blueWidth();
82template<QImage::Format> Q_DECL_CONSTEXPR uint blueShift();
83template<QImage::Format> Q_DECL_CONSTEXPR uint alphaWidth();
84template<QImage::Format> Q_DECL_CONSTEXPR uint alphaShift();
85
86template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB16>() { return 5; }
87template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB444>() { return 4; }
88template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB555>() { return 5; }
89template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB666>() { return 6; }
90template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGB888>() { return 8; }
91template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_BGR888>() { return 8; }
92template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
93template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
94template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
95template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
96template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBX8888>() { return 8; }
97template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888>() { return 8; }
98template<> Q_DECL_CONSTEXPR uint redWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
99
100template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB16>() { return 11; }
101template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB444>() { return 8; }
102template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB555>() { return 10; }
103template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB666>() { return 12; }
104template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGB888>() { return 16; }
105template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_BGR888>() { return 0; }
106template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB4444_Premultiplied>() { return 8; }
107template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8555_Premultiplied>() { return 18; }
108template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB8565_Premultiplied>() { return 19; }
109template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_ARGB6666_Premultiplied>() { return 12; }
110#if Q_BYTE_ORDER == Q_BIG_ENDIAN
111template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 24; }
112template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 24; }
113template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
114#else
115template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBX8888>() { return 0; }
116template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888>() { return 0; }
117template<> Q_DECL_CONSTEXPR uint redShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
118#endif
119template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB16>() { return 6; }
120template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB444>() { return 4; }
121template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB555>() { return 5; }
122template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB666>() { return 6; }
123template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGB888>() { return 8; }
124template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_BGR888>() { return 8; }
125template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
126template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
127template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB8565_Premultiplied>() { return 6; }
128template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
129template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBX8888>() { return 8; }
130template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888>() { return 8; }
131template<> Q_DECL_CONSTEXPR uint greenWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
132
133template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB16>() { return 5; }
134template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB444>() { return 4; }
135template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB555>() { return 5; }
136template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB666>() { return 6; }
137template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGB888>() { return 8; }
138template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_BGR888>() { return 8; }
139template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
140template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8555_Premultiplied>() { return 13; }
141template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB8565_Premultiplied>() { return 13; }
142template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
143#if Q_BYTE_ORDER == Q_BIG_ENDIAN
144template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 16; }
145template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 16; }
146template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
147#else
148template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBX8888>() { return 8; }
149template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888>() { return 8; }
150template<> Q_DECL_CONSTEXPR uint greenShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
151#endif
152template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB16>() { return 5; }
153template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB444>() { return 4; }
154template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB555>() { return 5; }
155template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB666>() { return 6; }
156template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGB888>() { return 8; }
157template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_BGR888>() { return 8; }
158template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
159template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8555_Premultiplied>() { return 5; }
160template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB8565_Premultiplied>() { return 5; }
161template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
162template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBX8888>() { return 8; }
163template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888>() { return 8; }
164template<> Q_DECL_CONSTEXPR uint blueWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
165
166template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB16>() { return 0; }
167template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB444>() { return 0; }
168template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB555>() { return 0; }
169template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB666>() { return 0; }
170template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGB888>() { return 0; }
171template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_BGR888>() { return 16; }
172template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB4444_Premultiplied>() { return 0; }
173template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
174template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
175template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_ARGB6666_Premultiplied>() { return 0; }
176#if Q_BYTE_ORDER == Q_BIG_ENDIAN
177template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 8; }
178template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 8; }
179template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
180#else
181template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBX8888>() { return 16; }
182template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888>() { return 16; }
183template<> Q_DECL_CONSTEXPR uint blueShift<QImage::Format_RGBA8888_Premultiplied>() { return 16; }
184#endif
185template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB16>() { return 0; }
186template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB444>() { return 0; }
187template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB555>() { return 0; }
188template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB666>() { return 0; }
189template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGB888>() { return 0; }
190template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_BGR888>() { return 0; }
191template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB4444_Premultiplied>() { return 4; }
192template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8555_Premultiplied>() { return 8; }
193template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB8565_Premultiplied>() { return 8; }
194template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_ARGB6666_Premultiplied>() { return 6; }
195template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBX8888>() { return 0; }
196template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888>() { return 8; }
197template<> Q_DECL_CONSTEXPR uint alphaWidth<QImage::Format_RGBA8888_Premultiplied>() { return 8; }
198
199template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB16>() { return 0; }
200template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB444>() { return 0; }
201template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB555>() { return 0; }
202template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB666>() { return 0; }
203template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGB888>() { return 0; }
204template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_BGR888>() { return 0; }
205template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB4444_Premultiplied>() { return 12; }
206template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8555_Premultiplied>() { return 0; }
207template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB8565_Premultiplied>() { return 0; }
208template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_ARGB6666_Premultiplied>() { return 18; }
209#if Q_BYTE_ORDER == Q_BIG_ENDIAN
210template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 0; }
211template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 0; }
212template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 0; }
213#else
214template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBX8888>() { return 24; }
215template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888>() { return 24; }
216template<> Q_DECL_CONSTEXPR uint alphaShift<QImage::Format_RGBA8888_Premultiplied>() { return 24; }
217#endif
218
219template<QImage::Format> constexpr QPixelLayout::BPP bitsPerPixel();
220template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB16>() { return QPixelLayout::BPP16; }
221template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB444>() { return QPixelLayout::BPP16; }
222template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB555>() { return QPixelLayout::BPP16; }
223template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB666>() { return QPixelLayout::BPP24; }
224template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGB888>() { return QPixelLayout::BPP24; }
225template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_BGR888>() { return QPixelLayout::BPP24; }
226template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB4444_Premultiplied>() { return QPixelLayout::BPP16; }
227template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8555_Premultiplied>() { return QPixelLayout::BPP24; }
228template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB8565_Premultiplied>() { return QPixelLayout::BPP24; }
229template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_ARGB6666_Premultiplied>() { return QPixelLayout::BPP24; }
230template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBX8888>() { return QPixelLayout::BPP32; }
231template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888>() { return QPixelLayout::BPP32; }
232template<> constexpr QPixelLayout::BPP bitsPerPixel<QImage::Format_RGBA8888_Premultiplied>() { return QPixelLayout::BPP32; }
233
234
235typedef const uint *(QT_FASTCALL *FetchPixelsFunc)(uint *buffer, const uchar *src, int index, int count);
236
237template <QPixelLayout::BPP bpp> static
238uint QT_FASTCALL fetchPixel(const uchar *, int)
239{
240 Q_UNREACHABLE();
241 return 0;
242}
243
244template <>
245inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
246{
247 return (src[index >> 3] >> (index & 7)) & 1;
248}
249
250template <>
251inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
252{
253 return (src[index >> 3] >> (~index & 7)) & 1;
254}
255
256template <>
257inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP8>(const uchar *src, int index)
258{
259 return src[index];
260}
261
262template <>
263inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP16>(const uchar *src, int index)
264{
265 return reinterpret_cast<const quint16 *>(src)[index];
266}
267
268template <>
269inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP24>(const uchar *src, int index)
270{
271 return reinterpret_cast<const quint24 *>(src)[index];
272}
273
274template <>
275inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP32>(const uchar *src, int index)
276{
277 return reinterpret_cast<const uint *>(src)[index];
278}
279
280template <>
281inline uint QT_FASTCALL fetchPixel<QPixelLayout::BPP64>(const uchar *src, int index)
282{
283 // We have to do the conversion in fetch to fit into a 32bit uint
284 QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
285 return c.toArgb32();
286}
287
288template <QPixelLayout::BPP bpp>
289static quint64 QT_FASTCALL fetchPixel64(const uchar *src, int index)
290{
291 Q_STATIC_ASSERT(bpp != QPixelLayout::BPP64);
292 return fetchPixel<bpp>(src, index);
293}
294
295template <QPixelLayout::BPP width> static
296void QT_FASTCALL storePixel(uchar *dest, int index, uint pixel);
297
298template <>
299inline void QT_FASTCALL storePixel<QPixelLayout::BPP16>(uchar *dest, int index, uint pixel)
300{
301 reinterpret_cast<quint16 *>(dest)[index] = quint16(pixel);
302}
303
304template <>
305inline void QT_FASTCALL storePixel<QPixelLayout::BPP24>(uchar *dest, int index, uint pixel)
306{
307 reinterpret_cast<quint24 *>(dest)[index] = quint24(pixel);
308}
309
310typedef uint (QT_FASTCALL *FetchPixelFunc)(const uchar *src, int index);
311
312static const FetchPixelFunc qFetchPixel[QPixelLayout::BPPCount] = {
313 nullptr, // BPPNone
314 fetchPixel<QPixelLayout::BPP1MSB>, // BPP1MSB
315 fetchPixel<QPixelLayout::BPP1LSB>, // BPP1LSB
316 fetchPixel<QPixelLayout::BPP8>, // BPP8
317 fetchPixel<QPixelLayout::BPP16>, // BPP16
318 fetchPixel<QPixelLayout::BPP24>, // BPP24
319 fetchPixel<QPixelLayout::BPP32>, // BPP32
320 fetchPixel<QPixelLayout::BPP64> // BPP64
321};
322
323template<QImage::Format Format>
324static Q_ALWAYS_INLINE uint convertPixelToRGB32(uint s)
325{
326 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
327 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
328 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
329
330 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
331 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
332 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
333
334 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
335 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
336 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
337
338 uint red = (s >> redShift<Format>()) & redMask;
339 uint green = (s >> greenShift<Format>()) & greenMask;
340 uint blue = (s >> blueShift<Format>()) & blueMask;
341
342 red = ((red << redLeftShift) | (red >> redRightShift)) << 16;
343 green = ((green << greenLeftShift) | (green >> greenRightShift)) << 8;
344 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
345 return 0xff000000 | red | green | blue;
346}
347
348template<QImage::Format Format>
349static void QT_FASTCALL convertToRGB32(uint *buffer, int count, const QVector<QRgb> *)
350{
351 for (int i = 0; i < count; ++i)
352 buffer[i] = convertPixelToRGB32<Format>(buffer[i]);
353}
354
355#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
356extern const uint * QT_FASTCALL fetchPixelsBPP24_ssse3(uint *dest, const uchar*src, int index, int count);
357#endif
358
359template<QImage::Format Format>
360static const uint *QT_FASTCALL fetchRGBToRGB32(uint *buffer, const uchar *src, int index, int count,
361 const QVector<QRgb> *, QDitherInfo *)
362{
363 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
364#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
365 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
366 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
367 // to vectorize the deforested version below.
368 fetchPixelsBPP24_ssse3(dest: buffer, src, index, count);
369 convertToRGB32<Format>(buffer, count, nullptr);
370 return buffer;
371 }
372#endif
373 for (int i = 0; i < count; ++i)
374 buffer[i] = convertPixelToRGB32<Format>(fetchPixel<BPP>(src, index + i));
375 return buffer;
376}
377
378template<QImage::Format Format>
379static Q_ALWAYS_INLINE QRgba64 convertPixelToRGB64(uint s)
380{
381 return QRgba64::fromArgb32(rgb: convertPixelToRGB32<Format>(s));
382}
383
384template<QImage::Format Format>
385static const QRgba64 *QT_FASTCALL convertToRGB64(QRgba64 *buffer, const uint *src, int count,
386 const QVector<QRgb> *, QDitherInfo *)
387{
388 for (int i = 0; i < count; ++i)
389 buffer[i] = convertPixelToRGB64<Format>(src[i]);
390 return buffer;
391}
392
393template<QImage::Format Format>
394static const QRgba64 *QT_FASTCALL fetchRGBToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
395 const QVector<QRgb> *, QDitherInfo *)
396{
397 for (int i = 0; i < count; ++i)
398 buffer[i] = convertPixelToRGB64<Format>(fetchPixel<bitsPerPixel<Format>()>(src, index + i));
399 return buffer;
400}
401
402template<QImage::Format Format>
403static Q_ALWAYS_INLINE uint convertPixelToARGB32PM(uint s)
404{
405 Q_CONSTEXPR uint alphaMask = ((1 << alphaWidth<Format>()) - 1);
406 Q_CONSTEXPR uint redMask = ((1 << redWidth<Format>()) - 1);
407 Q_CONSTEXPR uint greenMask = ((1 << greenWidth<Format>()) - 1);
408 Q_CONSTEXPR uint blueMask = ((1 << blueWidth<Format>()) - 1);
409
410 Q_CONSTEXPR uchar alphaLeftShift = 8 - alphaWidth<Format>();
411 Q_CONSTEXPR uchar redLeftShift = 8 - redWidth<Format>();
412 Q_CONSTEXPR uchar greenLeftShift = 8 - greenWidth<Format>();
413 Q_CONSTEXPR uchar blueLeftShift = 8 - blueWidth<Format>();
414
415 Q_CONSTEXPR uchar alphaRightShift = 2 * alphaWidth<Format>() - 8;
416 Q_CONSTEXPR uchar redRightShift = 2 * redWidth<Format>() - 8;
417 Q_CONSTEXPR uchar greenRightShift = 2 * greenWidth<Format>() - 8;
418 Q_CONSTEXPR uchar blueRightShift = 2 * blueWidth<Format>() - 8;
419
420 Q_CONSTEXPR bool mustMin = (alphaWidth<Format>() != redWidth<Format>()) ||
421 (alphaWidth<Format>() != greenWidth<Format>()) ||
422 (alphaWidth<Format>() != blueWidth<Format>());
423
424 uint alpha = (s >> alphaShift<Format>()) & alphaMask;
425 uint red = (s >> redShift<Format>()) & redMask;
426 uint green = (s >> greenShift<Format>()) & greenMask;
427 uint blue = (s >> blueShift<Format>()) & blueMask;
428
429 alpha = (alpha << alphaLeftShift) | (alpha >> alphaRightShift);
430 red = (red << redLeftShift) | (red >> redRightShift);
431 green = (green << greenLeftShift) | (green >> greenRightShift);
432 blue = (blue << blueLeftShift) | (blue >> blueRightShift);
433
434 if (mustMin) {
435 red = qMin(a: alpha, b: red);
436 green = qMin(a: alpha, b: green);
437 blue = qMin(a: alpha, b: blue);
438 }
439
440 return (alpha << 24) | (red << 16) | (green << 8) | blue;
441}
442
443template<QImage::Format Format>
444static void QT_FASTCALL convertARGBPMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
445{
446 for (int i = 0; i < count; ++i)
447 buffer[i] = convertPixelToARGB32PM<Format>(buffer[i]);
448}
449
450template<QImage::Format Format>
451static const uint *QT_FASTCALL fetchARGBPMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
452 const QVector<QRgb> *, QDitherInfo *)
453{
454 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
455#if defined(__SSE2__) && !defined(__SSSE3__) && QT_COMPILER_SUPPORTS_SSSE3
456 if (BPP == QPixelLayout::BPP24 && qCpuHasFeature(SSSE3)) {
457 // With SSE2 can convertToRGB32 be vectorized, but it takes SSSE3
458 // to vectorize the deforested version below.
459 fetchPixelsBPP24_ssse3(dest: buffer, src, index, count);
460 convertARGBPMToARGB32PM<Format>(buffer, count, nullptr);
461 return buffer;
462 }
463#endif
464 for (int i = 0; i < count; ++i)
465 buffer[i] = convertPixelToARGB32PM<Format>(fetchPixel<BPP>(src, index + i));
466 return buffer;
467}
468
469template<QImage::Format Format>
470static Q_ALWAYS_INLINE QRgba64 convertPixelToRGBA64PM(uint s)
471{
472 return QRgba64::fromArgb32(rgb: convertPixelToARGB32PM<Format>(s));
473}
474
475template<QImage::Format Format>
476static const QRgba64 *QT_FASTCALL convertARGBPMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
477 const QVector<QRgb> *, QDitherInfo *)
478{
479 for (int i = 0; i < count; ++i)
480 buffer[i] = convertPixelToRGB64<Format>(src[i]);
481 return buffer;
482}
483
484template<QImage::Format Format>
485static const QRgba64 *QT_FASTCALL fetchARGBPMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
486 const QVector<QRgb> *, QDitherInfo *)
487{
488 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
489 for (int i = 0; i < count; ++i)
490 buffer[i] = convertPixelToRGBA64PM<Format>(fetchPixel<bpp>(src, index + i));
491 return buffer;
492}
493
494template<QImage::Format Format, bool fromRGB>
495static void QT_FASTCALL storeRGBFromARGB32PM(uchar *dest, const uint *src, int index, int count,
496 const QVector<QRgb> *, QDitherInfo *dither)
497{
498 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
499 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
500 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
501 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
502
503 // RGB32 -> RGB888 is not a precision loss.
504 if (!dither || (rWidth == 8 && gWidth == 8 && bWidth == 8)) {
505 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
506 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
507 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
508 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
509 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
510 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
511
512 for (int i = 0; i < count; ++i) {
513 const uint c = fromRGB ? src[i] : qUnpremultiply(p: src[i]);
514 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
515 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
516 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
517 storePixel<BPP>(dest, index + i, r | g | b);
518 };
519 } else {
520 // We do ordered dither by using a rounding conversion, but instead of
521 // adding half of input precision, we add the adjusted result from the
522 // bayer matrix before narrowing.
523 // Note: Rounding conversion in itself is different from the naive
524 // conversion we do above for non-dithering.
525 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
526 for (int i = 0; i < count; ++i) {
527 const uint c = fromRGB ? src[i] : qUnpremultiply(p: src[i]);
528 const int d = bayer_line[(dither->x + i) & 15];
529 const int dr = d - ((d + 1) >> rWidth);
530 const int dg = d - ((d + 1) >> gWidth);
531 const int db = d - ((d + 1) >> bWidth);
532 int r = qRed(rgb: c);
533 int g = qGreen(rgb: c);
534 int b = qBlue(rgb: c);
535 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
536 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
537 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
538 const uint s = (r << redShift<Format>())
539 | (g << greenShift<Format>())
540 | (b << blueShift<Format>());
541 storePixel<BPP>(dest, index + i, s);
542 }
543 }
544}
545
546template<QImage::Format Format, bool fromRGB>
547static void QT_FASTCALL storeARGBPMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
548 const QVector<QRgb> *, QDitherInfo *dither)
549{
550 constexpr QPixelLayout::BPP BPP = bitsPerPixel<Format>();
551 if (!dither) {
552 Q_CONSTEXPR uint aMask = (1 << alphaWidth<Format>()) - 1;
553 Q_CONSTEXPR uint rMask = (1 << redWidth<Format>()) - 1;
554 Q_CONSTEXPR uint gMask = (1 << greenWidth<Format>()) - 1;
555 Q_CONSTEXPR uint bMask = (1 << blueWidth<Format>()) - 1;
556
557 Q_CONSTEXPR uchar aRightShift = 32 - alphaWidth<Format>();
558 Q_CONSTEXPR uchar rRightShift = 24 - redWidth<Format>();
559 Q_CONSTEXPR uchar gRightShift = 16 - greenWidth<Format>();
560 Q_CONSTEXPR uchar bRightShift = 8 - blueWidth<Format>();
561
562 Q_CONSTEXPR uint aOpaque = aMask << alphaShift<Format>();
563 for (int i = 0; i < count; ++i) {
564 const uint c = src[i];
565 const uint a = fromRGB ? aOpaque : (((c >> aRightShift) & aMask) << alphaShift<Format>());
566 const uint r = ((c >> rRightShift) & rMask) << redShift<Format>();
567 const uint g = ((c >> gRightShift) & gMask) << greenShift<Format>();
568 const uint b = ((c >> bRightShift) & bMask) << blueShift<Format>();
569 storePixel<BPP>(dest, index + i, a | r | g | b);
570 };
571 } else {
572 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
573 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
574 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
575 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
576
577 const uint *bayer_line = qt_bayer_matrix[dither->y & 15];
578 for (int i = 0; i < count; ++i) {
579 const uint c = src[i];
580 const int d = bayer_line[(dither->x + i) & 15];
581 const int da = d - ((d + 1) >> aWidth);
582 const int dr = d - ((d + 1) >> rWidth);
583 const int dg = d - ((d + 1) >> gWidth);
584 const int db = d - ((d + 1) >> bWidth);
585 int a = qAlpha(rgb: c);
586 int r = qRed(rgb: c);
587 int g = qGreen(rgb: c);
588 int b = qBlue(rgb: c);
589 if (fromRGB)
590 a = (1 << aWidth) - 1;
591 else
592 a = (a + ((da - a) >> aWidth) + 1) >> (8 - aWidth);
593 r = (r + ((dr - r) >> rWidth) + 1) >> (8 - rWidth);
594 g = (g + ((dg - g) >> gWidth) + 1) >> (8 - gWidth);
595 b = (b + ((db - b) >> bWidth) + 1) >> (8 - bWidth);
596 uint s = (a << alphaShift<Format>())
597 | (r << redShift<Format>())
598 | (g << greenShift<Format>())
599 | (b << blueShift<Format>());
600 storePixel<BPP>(dest, index + i, s);
601 }
602 }
603}
604
605template<QImage::Format Format>
606static void QT_FASTCALL rbSwap(uchar *dst, const uchar *src, int count)
607{
608 Q_CONSTEXPR uchar aWidth = alphaWidth<Format>();
609 Q_CONSTEXPR uchar aShift = alphaShift<Format>();
610 Q_CONSTEXPR uchar rWidth = redWidth<Format>();
611 Q_CONSTEXPR uchar rShift = redShift<Format>();
612 Q_CONSTEXPR uchar gWidth = greenWidth<Format>();
613 Q_CONSTEXPR uchar gShift = greenShift<Format>();
614 Q_CONSTEXPR uchar bWidth = blueWidth<Format>();
615 Q_CONSTEXPR uchar bShift = blueShift<Format>();
616#ifdef Q_COMPILER_CONSTEXPR
617 Q_STATIC_ASSERT(rWidth == bWidth);
618#endif
619 Q_CONSTEXPR uint redBlueMask = (1 << rWidth) - 1;
620 Q_CONSTEXPR uint alphaGreenMask = (((1 << aWidth) - 1) << aShift)
621 | (((1 << gWidth) - 1) << gShift);
622 constexpr QPixelLayout::BPP bpp = bitsPerPixel<Format>();
623
624 for (int i = 0; i < count; ++i) {
625 const uint c = fetchPixel<bpp>(src, i);
626 const uint r = (c >> rShift) & redBlueMask;
627 const uint b = (c >> bShift) & redBlueMask;
628 const uint t = (c & alphaGreenMask)
629 | (r << bShift)
630 | (b << rShift);
631 storePixel<bpp>(dst, i, t);
632 }
633}
634
635static void QT_FASTCALL rbSwap_rgb32(uchar *d, const uchar *s, int count)
636{
637 const uint *src = reinterpret_cast<const uint *>(s);
638 uint *dest = reinterpret_cast<uint *>(d);
639 for (int i = 0; i < count; ++i) {
640 const uint c = src[i];
641 const uint ag = c & 0xff00ff00;
642 const uint rb = c & 0x00ff00ff;
643 dest[i] = ag | (rb << 16) | (rb >> 16);
644 }
645}
646
647#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
648template<>
649void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
650{
651 return rbSwap_rgb32(d, s, count);
652}
653#else
654template<>
655void QT_FASTCALL rbSwap<QImage::Format_RGBA8888>(uchar *d, const uchar *s, int count)
656{
657 const uint *src = reinterpret_cast<const uint *>(s);
658 uint *dest = reinterpret_cast<uint *>(d);
659 for (int i = 0; i < count; ++i) {
660 const uint c = src[i];
661 const uint rb = c & 0xff00ff00;
662 const uint ga = c & 0x00ff00ff;
663 dest[i] = ga | (rb << 16) | (rb >> 16);
664 }
665}
666#endif
667
668static void QT_FASTCALL rbSwap_rgb30(uchar *d, const uchar *s, int count)
669{
670 const uint *src = reinterpret_cast<const uint *>(s);
671 uint *dest = reinterpret_cast<uint *>(d);
672 UNALIASED_CONVERSION_LOOP(dest, src, count, qRgbSwapRgb30);
673}
674
675template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutRGB()
676{
677 return QPixelLayout{
678 false,
679 false,
680 bitsPerPixel<Format>(),
681 rbSwap<Format>,
682 convertToRGB32<Format>,
683 convertToRGB64<Format>,
684 fetchRGBToRGB32<Format>,
685 fetchRGBToRGB64<Format>,
686 storeRGBFromARGB32PM<Format, false>,
687 storeRGBFromARGB32PM<Format, true>
688 };
689}
690
691template<QImage::Format Format> Q_DECL_CONSTEXPR static inline QPixelLayout pixelLayoutARGBPM()
692{
693 return QPixelLayout{
694 true,
695 true,
696 bitsPerPixel<Format>(),
697 rbSwap<Format>,
698 convertARGBPMToARGB32PM<Format>,
699 convertARGBPMToRGBA64PM<Format>,
700 fetchARGBPMToARGB32PM<Format>,
701 fetchARGBPMToRGBA64PM<Format>,
702 storeARGBPMFromARGB32PM<Format, false>,
703 storeARGBPMFromARGB32PM<Format, true>
704 };
705}
706
707static void QT_FASTCALL convertIndexedToARGB32PM(uint *buffer, int count, const QVector<QRgb> *clut)
708{
709 for (int i = 0; i < count; ++i)
710 buffer[i] = qPremultiply(x: clut->at(i: buffer[i]));
711}
712
713template<QPixelLayout::BPP BPP>
714static const uint *QT_FASTCALL fetchIndexedToARGB32PM(uint *buffer, const uchar *src, int index, int count,
715 const QVector<QRgb> *clut, QDitherInfo *)
716{
717 for (int i = 0; i < count; ++i) {
718 const uint s = fetchPixel<BPP>(src, index + i);
719 buffer[i] = qPremultiply(x: clut->at(i: s));
720 }
721 return buffer;
722}
723
724template<QPixelLayout::BPP BPP>
725static const QRgba64 *QT_FASTCALL fetchIndexedToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
726 const QVector<QRgb> *clut, QDitherInfo *)
727{
728 for (int i = 0; i < count; ++i) {
729 const uint s = fetchPixel<BPP>(src, index + i);
730 buffer[i] = QRgba64::fromArgb32(rgb: clut->at(i: s)).premultiplied();
731 }
732 return buffer;
733}
734
735static const QRgba64 *QT_FASTCALL convertIndexedToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
736 const QVector<QRgb> *clut, QDitherInfo *)
737{
738 for (int i = 0; i < count; ++i)
739 buffer[i] = QRgba64::fromArgb32(rgb: clut->at(i: src[i])).premultiplied();
740 return buffer;
741}
742
743static void QT_FASTCALL convertPassThrough(uint *, int, const QVector<QRgb> *)
744{
745}
746
747static const uint *QT_FASTCALL fetchPassThrough(uint *, const uchar *src, int index, int,
748 const QVector<QRgb> *, QDitherInfo *)
749{
750 return reinterpret_cast<const uint *>(src) + index;
751}
752
753static const QRgba64 *QT_FASTCALL fetchPassThrough64(QRgba64 *, const uchar *src, int index, int,
754 const QVector<QRgb> *, QDitherInfo *)
755{
756 return reinterpret_cast<const QRgba64 *>(src) + index;
757}
758
759static void QT_FASTCALL storePassThrough(uchar *dest, const uint *src, int index, int count,
760 const QVector<QRgb> *, QDitherInfo *)
761{
762 uint *d = reinterpret_cast<uint *>(dest) + index;
763 if (d != src)
764 memcpy(dest: d, src: src, n: count * sizeof(uint));
765}
766
767static void QT_FASTCALL convertARGB32ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
768{
769 qt_convertARGB32ToARGB32PM(buffer, src: buffer, count);
770}
771
772static const uint *QT_FASTCALL fetchARGB32ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
773 const QVector<QRgb> *, QDitherInfo *)
774{
775 return qt_convertARGB32ToARGB32PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
776}
777
778static void QT_FASTCALL convertRGBA8888PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
779{
780 for (int i = 0; i < count; ++i)
781 buffer[i] = RGBA2ARGB(x: buffer[i]);
782}
783
784static const uint *QT_FASTCALL fetchRGBA8888PMToARGB32PM(uint *buffer, const uchar *src, int index, int count,
785 const QVector<QRgb> *, QDitherInfo *)
786{
787 const uint *s = reinterpret_cast<const uint *>(src) + index;
788 UNALIASED_CONVERSION_LOOP(buffer, s, count, RGBA2ARGB);
789 return buffer;
790}
791
792static void QT_FASTCALL convertRGBA8888ToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
793{
794 qt_convertRGBA8888ToARGB32PM(buffer, src: buffer, count);
795}
796
797static const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
798 const QVector<QRgb> *, QDitherInfo *)
799{
800 return qt_convertRGBA8888ToARGB32PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count);
801}
802
803static void QT_FASTCALL convertAlpha8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
804{
805 for (int i = 0; i < count; ++i)
806 buffer[i] = qRgba(r: 0, g: 0, b: 0, a: buffer[i]);
807}
808
809static const uint *QT_FASTCALL fetchAlpha8ToRGB32(uint *buffer, const uchar *src, int index, int count,
810 const QVector<QRgb> *, QDitherInfo *)
811{
812 for (int i = 0; i < count; ++i)
813 buffer[i] = qRgba(r: 0, g: 0, b: 0, a: src[index + i]);
814 return buffer;
815}
816
817static const QRgba64 *QT_FASTCALL convertAlpha8ToRGB64(QRgba64 *buffer, const uint *src, int count,
818 const QVector<QRgb> *, QDitherInfo *)
819{
820 for (int i = 0; i < count; ++i)
821 buffer[i] = QRgba64::fromRgba(red: 0, green: 0, blue: 0, alpha: src[i]);
822 return buffer;
823}
824static const QRgba64 *QT_FASTCALL fetchAlpha8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
825 const QVector<QRgb> *, QDitherInfo *)
826{
827 for (int i = 0; i < count; ++i)
828 buffer[i] = QRgba64::fromRgba(red: 0, green: 0, blue: 0, alpha: src[index + i]);
829 return buffer;
830}
831
832static void QT_FASTCALL convertGrayscale8ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
833{
834 for (int i = 0; i < count; ++i) {
835 const uint s = buffer[i];
836 buffer[i] = qRgb(r: s, g: s, b: s);
837 }
838}
839
840static const uint *QT_FASTCALL fetchGrayscale8ToRGB32(uint *buffer, const uchar *src, int index, int count,
841 const QVector<QRgb> *, QDitherInfo *)
842{
843 for (int i = 0; i < count; ++i) {
844 const uint s = src[index + i];
845 buffer[i] = qRgb(r: s, g: s, b: s);
846 }
847 return buffer;
848}
849
850static const QRgba64 *QT_FASTCALL convertGrayscale8ToRGB64(QRgba64 *buffer, const uint *src, int count,
851 const QVector<QRgb> *, QDitherInfo *)
852{
853 for (int i = 0; i < count; ++i)
854 buffer[i] = QRgba64::fromRgba(red: src[i], green: src[i], blue: src[i], alpha: 255);
855 return buffer;
856}
857
858static const QRgba64 *QT_FASTCALL fetchGrayscale8ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
859 const QVector<QRgb> *, QDitherInfo *)
860{
861 for (int i = 0; i < count; ++i) {
862 const uint s = src[index + i];
863 buffer[i] = QRgba64::fromRgba(red: s, green: s, blue: s, alpha: 255);
864 }
865 return buffer;
866}
867
868static void QT_FASTCALL convertGrayscale16ToRGB32(uint *buffer, int count, const QVector<QRgb> *)
869{
870 for (int i = 0; i < count; ++i) {
871 const uint x = qt_div_257(x: buffer[i]);
872 buffer[i] = qRgb(r: x, g: x, b: x);
873 }
874}
875
876static const uint *QT_FASTCALL fetchGrayscale16ToRGB32(uint *buffer, const uchar *src, int index, int count,
877 const QVector<QRgb> *, QDitherInfo *)
878{
879 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
880 for (int i = 0; i < count; ++i) {
881 const uint x = qt_div_257(x: s[i]);
882 buffer[i] = qRgb(r: x, g: x, b: x);
883 }
884 return buffer;
885}
886
887static const QRgba64 *QT_FASTCALL convertGrayscale16ToRGBA64(QRgba64 *buffer, const uint *src, int count,
888 const QVector<QRgb> *, QDitherInfo *)
889{
890 for (int i = 0; i < count; ++i)
891 buffer[i] = QRgba64::fromRgba64(red: src[i], green: src[i], blue: src[i], alpha: 65535);
892 return buffer;
893}
894
895static const QRgba64 *QT_FASTCALL fetchGrayscale16ToRGBA64(QRgba64 *buffer, const uchar *src, int index, int count,
896 const QVector<QRgb> *, QDitherInfo *)
897{
898 const unsigned short *s = reinterpret_cast<const unsigned short *>(src) + index;
899 for (int i = 0; i < count; ++i) {
900 buffer[i] = QRgba64::fromRgba64(red: s[i], green: s[i], blue: s[i], alpha: 65535);
901 }
902 return buffer;
903}
904
905static void QT_FASTCALL storeARGB32FromARGB32PM(uchar *dest, const uint *src, int index, int count,
906 const QVector<QRgb> *, QDitherInfo *)
907{
908 uint *d = reinterpret_cast<uint *>(dest) + index;
909 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return qUnpremultiply(c); });
910}
911
912static void QT_FASTCALL storeRGBA8888PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
913 const QVector<QRgb> *, QDitherInfo *)
914{
915 uint *d = reinterpret_cast<uint *>(dest) + index;
916 UNALIASED_CONVERSION_LOOP(d, src, count, ARGB2RGBA);
917}
918
919#ifdef __SSE2__
920template<bool RGBA, bool maskAlpha>
921static inline void qConvertARGB32PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
922{
923 if (count <= 0)
924 return;
925
926 const __m128i amask = _mm_set1_epi32(i: 0xff000000);
927 int i = 0;
928 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i) {
929 uint s = *src++;
930 if (maskAlpha)
931 s = s | 0xff000000;
932 if (RGBA)
933 s = RGBA2ARGB(x: s);
934 *buffer++ = QRgba64::fromArgb32(rgb: s);
935 }
936 for (; i < count-3; i += 4) {
937 __m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
938 if (maskAlpha)
939 vs = _mm_or_si128(a: vs, b: amask);
940 src += 4;
941 __m128i v1 = _mm_unpacklo_epi8(a: vs, b: vs);
942 __m128i v2 = _mm_unpackhi_epi8(a: vs, b: vs);
943 if (!RGBA) {
944 v1 = _mm_shufflelo_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
945 v2 = _mm_shufflelo_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
946 v1 = _mm_shufflehi_epi16(v1, _MM_SHUFFLE(3, 0, 1, 2));
947 v2 = _mm_shufflehi_epi16(v2, _MM_SHUFFLE(3, 0, 1, 2));
948 }
949 _mm_store_si128(p: (__m128i*)(buffer), b: v1);
950 buffer += 2;
951 _mm_store_si128(p: (__m128i*)(buffer), b: v2);
952 buffer += 2;
953 }
954
955 SIMD_EPILOGUE(i, count, 3) {
956 uint s = *src++;
957 if (maskAlpha)
958 s = s | 0xff000000;
959 if (RGBA)
960 s = RGBA2ARGB(x: s);
961 *buffer++ = QRgba64::fromArgb32(rgb: s);
962 }
963}
964
965template<QtPixelOrder PixelOrder>
966static inline void qConvertRGBA64PMToA2RGB30PM_sse2(uint *dest, const QRgba64 *buffer, int count)
967{
968 const __m128i gmask = _mm_set1_epi32(i: 0x000ffc00);
969 const __m128i cmask = _mm_set1_epi32(i: 0x000003ff);
970 int i = 0;
971 __m128i vr, vg, vb, va;
972 for (; i < count && uintptr_t(buffer) & 0xF; ++i) {
973 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
974 }
975
976 for (; i < count-15; i += 16) {
977 // Repremultiplying is really expensive and hard to do in SIMD without AVX2,
978 // so we try to avoid it by checking if it is needed 16 samples at a time.
979 __m128i vOr = _mm_set1_epi32(i: 0);
980 __m128i vAnd = _mm_set1_epi32(i: 0xffffffff);
981 for (int j = 0; j < 16; j += 2) {
982 __m128i vs = _mm_load_si128(p: (const __m128i*)(buffer + j));
983 vOr = _mm_or_si128(a: vOr, b: vs);
984 vAnd = _mm_and_si128(a: vAnd, b: vs);
985 }
986 const quint16 orAlpha = ((uint)_mm_extract_epi16(vOr, 3)) | ((uint)_mm_extract_epi16(vOr, 7));
987 const quint16 andAlpha = ((uint)_mm_extract_epi16(vAnd, 3)) & ((uint)_mm_extract_epi16(vAnd, 7));
988
989 if (andAlpha == 0xffff) {
990 for (int j = 0; j < 16; j += 2) {
991 __m128i vs = _mm_load_si128(p: (const __m128i*)buffer);
992 buffer += 2;
993 vr = _mm_srli_epi64(a: vs, count: 6);
994 vg = _mm_srli_epi64(a: vs, count: 16 + 6 - 10);
995 vb = _mm_srli_epi64(a: vs, count: 32 + 6);
996 vr = _mm_and_si128(a: vr, b: cmask);
997 vg = _mm_and_si128(a: vg, b: gmask);
998 vb = _mm_and_si128(a: vb, b: cmask);
999 va = _mm_srli_epi64(a: vs, count: 48 + 14);
1000 if (PixelOrder == PixelOrderRGB)
1001 vr = _mm_slli_epi32(a: vr, count: 20);
1002 else
1003 vb = _mm_slli_epi32(a: vb, count: 20);
1004 va = _mm_slli_epi32(a: va, count: 30);
1005 __m128i vd = _mm_or_si128(a: _mm_or_si128(a: vr, b: vg), b: _mm_or_si128(a: vb, b: va));
1006 vd = _mm_shuffle_epi32(vd, _MM_SHUFFLE(3, 1, 2, 0));
1007 _mm_storel_epi64(p: (__m128i*)dest, a: vd);
1008 dest += 2;
1009 }
1010 } else if (orAlpha == 0) {
1011 for (int j = 0; j < 16; ++j) {
1012 *dest++ = 0;
1013 buffer++;
1014 }
1015 } else {
1016 for (int j = 0; j < 16; ++j)
1017 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1018 }
1019 }
1020
1021 SIMD_EPILOGUE(i, count, 15)
1022 *dest++ = qConvertRgb64ToRgb30<PixelOrder>(*buffer++);
1023}
1024#elif defined(__ARM_NEON__)
1025template<bool RGBA, bool maskAlpha>
1026static inline void qConvertARGB32PMToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count)
1027{
1028 if (count <= 0)
1029 return;
1030
1031 const uint32x4_t amask = vdupq_n_u32(0xff000000);
1032#if defined(Q_PROCESSOR_ARM_64)
1033 const uint8x16_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7, 10, 9, 8, 11, 14, 13, 12, 15};
1034#else
1035 const uint8x8_t rgbaMask = { 2, 1, 0, 3, 6, 5, 4, 7 };
1036#endif
1037 int i = 0;
1038 for (; i < count-3; i += 4) {
1039 uint32x4_t vs32 = vld1q_u32(src);
1040 src += 4;
1041 if (maskAlpha)
1042 vs32 = vorrq_u32(vs32, amask);
1043 uint8x16_t vs8 = vreinterpretq_u8_u32(vs32);
1044 if (!RGBA) {
1045#if defined(Q_PROCESSOR_ARM_64)
1046 vs8 = vqtbl1q_u8(vs8, rgbaMask);
1047#else
1048 // no vqtbl1q_u8
1049 const uint8x8_t vlo = vtbl1_u8(vget_low_u8(vs8), rgbaMask);
1050 const uint8x8_t vhi = vtbl1_u8(vget_high_u8(vs8), rgbaMask);
1051 vs8 = vcombine_u8(vlo, vhi);
1052#endif
1053 }
1054 uint8x16x2_t v = vzipq_u8(vs8, vs8);
1055
1056 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[0]));
1057 buffer += 2;
1058 vst1q_u16((uint16_t *)buffer, vreinterpretq_u16_u8(v.val[1]));
1059 buffer += 2;
1060 }
1061
1062 SIMD_EPILOGUE(i, count, 3) {
1063 uint s = *src++;
1064 if (maskAlpha)
1065 s = s | 0xff000000;
1066 if (RGBA)
1067 s = RGBA2ARGB(s);
1068 *buffer++ = QRgba64::fromArgb32(s);
1069 }
1070}
1071#endif
1072
1073static const QRgba64 *QT_FASTCALL convertRGB32ToRGB64(QRgba64 *buffer, const uint *src, int count,
1074 const QVector<QRgb> *, QDitherInfo *)
1075{
1076#ifdef __SSE2__
1077 qConvertARGB32PMToRGBA64PM_sse2<false, true>(buffer, src, count);
1078#elif defined(__ARM_NEON__)
1079 qConvertARGB32PMToRGBA64PM_neon<false, true>(buffer, src, count);
1080#else
1081 for (int i = 0; i < count; ++i)
1082 buffer[i] = QRgba64::fromArgb32(0xff000000 | src[i]);
1083#endif
1084 return buffer;
1085}
1086
1087static const QRgba64 *QT_FASTCALL fetchRGB32ToRGB64(QRgba64 *buffer, const uchar *src, int index, int count,
1088 const QVector<QRgb> *, QDitherInfo *)
1089{
1090 return convertRGB32ToRGB64(buffer, src: reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1091}
1092
1093static const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1094 const QVector<QRgb> *, QDitherInfo *)
1095{
1096 for (int i = 0; i < count; ++i)
1097 buffer[i] = QRgba64::fromArgb32(rgb: src[i]).premultiplied();
1098 return buffer;
1099}
1100
1101static const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1102 const QVector<QRgb> *, QDitherInfo *)
1103{
1104 return convertARGB32ToRGBA64PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1105}
1106
1107static const QRgba64 *QT_FASTCALL convertARGB32PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1108 const QVector<QRgb> *, QDitherInfo *)
1109{
1110#ifdef __SSE2__
1111 qConvertARGB32PMToRGBA64PM_sse2<false, false>(buffer, src, count);
1112#elif defined(__ARM_NEON__)
1113 qConvertARGB32PMToRGBA64PM_neon<false, false>(buffer, src, count);
1114#else
1115 for (int i = 0; i < count; ++i)
1116 buffer[i] = QRgba64::fromArgb32(src[i]);
1117#endif
1118 return buffer;
1119}
1120
1121static const QRgba64 *QT_FASTCALL fetchARGB32PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1122 const QVector<QRgb> *, QDitherInfo *)
1123{
1124 return convertARGB32PMToRGBA64PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1125}
1126
1127#if QT_CONFIG(raster_64bit)
1128static void convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
1129{
1130 for (int i = 0; i < count; ++i)
1131 buffer[i] = buffer[i].premultiplied();
1132}
1133
1134static void convertRGBA64PMToRGBA64PM(QRgba64 *, int)
1135{
1136}
1137#endif
1138
1139static const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1140 const QVector<QRgb> *, QDitherInfo *)
1141{
1142 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1143 for (int i = 0; i < count; ++i)
1144 buffer[i] = QRgba64::fromRgba64(c: s[i]).premultiplied();
1145 return buffer;
1146}
1147
1148static const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1149 const QVector<QRgb> *, QDitherInfo *)
1150{
1151 for (int i = 0; i < count; ++i)
1152 buffer[i] = QRgba64::fromArgb32(rgb: RGBA2ARGB(x: src[i])).premultiplied();
1153 return buffer;
1154}
1155
1156static const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1157 const QVector<QRgb> *, QDitherInfo *)
1158{
1159 return convertRGBA8888ToRGBA64PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1160}
1161
1162static const QRgba64 *QT_FASTCALL convertRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1163 const QVector<QRgb> *, QDitherInfo *)
1164{
1165#ifdef __SSE2__
1166 qConvertARGB32PMToRGBA64PM_sse2<true, false>(buffer, src, count);
1167#elif defined(__ARM_NEON__)
1168 qConvertARGB32PMToRGBA64PM_neon<true, false>(buffer, src, count);
1169#else
1170 for (int i = 0; i < count; ++i)
1171 buffer[i] = QRgba64::fromArgb32(RGBA2ARGB(src[i]));
1172#endif
1173 return buffer;
1174}
1175
1176static const QRgba64 *QT_FASTCALL fetchRGBA8888PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1177 const QVector<QRgb> *, QDitherInfo *)
1178{
1179 return convertRGBA8888PMToRGBA64PM(buffer, src: reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1180}
1181
1182static void QT_FASTCALL storeRGBA8888FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1183 const QVector<QRgb> *, QDitherInfo *)
1184{
1185 uint *d = reinterpret_cast<uint *>(dest) + index;
1186 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(qUnpremultiply(c)); });
1187}
1188
1189static void QT_FASTCALL storeRGBXFromRGB32(uchar *dest, const uint *src, int index, int count,
1190 const QVector<QRgb> *, QDitherInfo *)
1191{
1192 uint *d = reinterpret_cast<uint *>(dest) + index;
1193 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | c); });
1194}
1195
1196static void QT_FASTCALL storeRGBXFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1197 const QVector<QRgb> *, QDitherInfo *)
1198{
1199 uint *d = reinterpret_cast<uint *>(dest) + index;
1200 UNALIASED_CONVERSION_LOOP(d, src, count, [](uint c) { return ARGB2RGBA(0xff000000 | qUnpremultiply(c)); });
1201}
1202
1203template<QtPixelOrder PixelOrder>
1204static void QT_FASTCALL convertA2RGB30PMToARGB32PM(uint *buffer, int count, const QVector<QRgb> *)
1205{
1206 for (int i = 0; i < count; ++i)
1207 buffer[i] = qConvertA2rgb30ToArgb32<PixelOrder>(buffer[i]);
1208}
1209
1210template<QtPixelOrder PixelOrder>
1211static const uint *QT_FASTCALL fetchA2RGB30PMToARGB32PM(uint *buffer, const uchar *s, int index, int count,
1212 const QVector<QRgb> *, QDitherInfo *dither)
1213{
1214 const uint *src = reinterpret_cast<const uint *>(s) + index;
1215 if (!dither) {
1216 UNALIASED_CONVERSION_LOOP(buffer, src, count, qConvertA2rgb30ToArgb32<PixelOrder>);
1217 } else {
1218 for (int i = 0; i < count; ++i) {
1219 const uint c = src[i];
1220 short d10 = (qt_bayer_matrix[dither->y & 15][(dither->x + i) & 15] << 2);
1221 short a10 = (c >> 30) * 0x155;
1222 short r10 = ((c >> 20) & 0x3ff);
1223 short g10 = ((c >> 10) & 0x3ff);
1224 short b10 = (c & 0x3ff);
1225 if (PixelOrder == PixelOrderBGR)
1226 std::swap(a&: r10, b&: b10);
1227 short a8 = (a10 + ((d10 - a10) >> 8)) >> 2;
1228 short r8 = (r10 + ((d10 - r10) >> 8)) >> 2;
1229 short g8 = (g10 + ((d10 - g10) >> 8)) >> 2;
1230 short b8 = (b10 + ((d10 - b10) >> 8)) >> 2;
1231 buffer[i] = qRgba(r: r8, g: g8, b: b8, a: a8);
1232 }
1233 }
1234 return buffer;
1235}
1236
1237#ifdef __SSE2__
1238template<QtPixelOrder PixelOrder>
1239static inline void qConvertA2RGB30PMToRGBA64PM_sse2(QRgba64 *buffer, const uint *src, int count)
1240{
1241 if (count <= 0)
1242 return;
1243
1244 const __m128i rmask = _mm_set1_epi32(i: 0x3ff00000);
1245 const __m128i gmask = _mm_set1_epi32(i: 0x000ffc00);
1246 const __m128i bmask = _mm_set1_epi32(i: 0x000003ff);
1247 const __m128i afactor = _mm_set1_epi16(w: 0x5555);
1248 int i = 0;
1249
1250 for (; ((uintptr_t)buffer & 0xf) && i < count; ++i)
1251 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1252
1253 for (; i < count-3; i += 4) {
1254 __m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
1255 src += 4;
1256 __m128i va = _mm_srli_epi32(a: vs, count: 30);
1257 __m128i vr = _mm_and_si128(a: vs, b: rmask);
1258 __m128i vb = _mm_and_si128(a: vs, b: bmask);
1259 __m128i vg = _mm_and_si128(a: vs, b: gmask);
1260 va = _mm_mullo_epi16(a: va, b: afactor);
1261 vr = _mm_or_si128(a: _mm_srli_epi32(a: vr, count: 14), b: _mm_srli_epi32(a: vr, count: 24));
1262 vg = _mm_or_si128(a: _mm_srli_epi32(a: vg, count: 4), b: _mm_srli_epi32(a: vg, count: 14));
1263 vb = _mm_or_si128(a: _mm_slli_epi32(a: vb, count: 6), b: _mm_srli_epi32(a: vb, count: 4));
1264 __m128i vrb;
1265 if (PixelOrder == PixelOrderRGB)
1266 vrb = _mm_or_si128(a: vr, _mm_slli_si128(vb, 2));
1267 else
1268 vrb = _mm_or_si128(a: vb, _mm_slli_si128(vr, 2));
1269 __m128i vga = _mm_or_si128(a: vg, _mm_slli_si128(va, 2));
1270 _mm_store_si128(p: (__m128i*)(buffer), b: _mm_unpacklo_epi16(a: vrb, b: vga));
1271 buffer += 2;
1272 _mm_store_si128(p: (__m128i*)(buffer), b: _mm_unpackhi_epi16(a: vrb, b: vga));
1273 buffer += 2;
1274 }
1275
1276 SIMD_EPILOGUE(i, count, 3)
1277 *buffer++ = qConvertA2rgb30ToRgb64<PixelOrder>(*src++);
1278}
1279#endif
1280
1281template<QtPixelOrder PixelOrder>
1282static const QRgba64 *QT_FASTCALL convertA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uint *src, int count,
1283 const QVector<QRgb> *, QDitherInfo *)
1284{
1285#ifdef __SSE2__
1286 qConvertA2RGB30PMToRGBA64PM_sse2<PixelOrder>(buffer, src, count);
1287#else
1288 for (int i = 0; i < count; ++i)
1289 buffer[i] = qConvertA2rgb30ToRgb64<PixelOrder>(src[i]);
1290#endif
1291 return buffer;
1292}
1293
1294template<QtPixelOrder PixelOrder>
1295static const QRgba64 *QT_FASTCALL fetchA2RGB30PMToRGBA64PM(QRgba64 *buffer, const uchar *src, int index, int count,
1296 const QVector<QRgb> *, QDitherInfo *)
1297{
1298 return convertA2RGB30PMToRGBA64PM<PixelOrder>(buffer, reinterpret_cast<const uint *>(src) + index, count, nullptr, nullptr);
1299}
1300
1301template<QtPixelOrder PixelOrder>
1302static void QT_FASTCALL storeA2RGB30PMFromARGB32PM(uchar *dest, const uint *src, int index, int count,
1303 const QVector<QRgb> *, QDitherInfo *)
1304{
1305 uint *d = reinterpret_cast<uint *>(dest) + index;
1306 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertArgb32ToA2rgb30<PixelOrder>);
1307}
1308
1309template<QtPixelOrder PixelOrder>
1310static void QT_FASTCALL storeRGB30FromRGB32(uchar *dest, const uint *src, int index, int count,
1311 const QVector<QRgb> *, QDitherInfo *)
1312{
1313 uint *d = reinterpret_cast<uint *>(dest) + index;
1314 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1315}
1316
1317template<QtPixelOrder PixelOrder>
1318static void QT_FASTCALL storeRGB30FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1319 const QVector<QRgb> *, QDitherInfo *)
1320{
1321 uint *d = reinterpret_cast<uint *>(dest) + index;
1322 UNALIASED_CONVERSION_LOOP(d, src, count, qConvertRgb32ToRgb30<PixelOrder>);
1323}
1324
1325template<bool RGBA>
1326void qt_convertRGBA64ToARGB32(uint *dst, const QRgba64 *src, int count)
1327{
1328 int i = 0;
1329#ifdef __SSE2__
1330 if (((uintptr_t)dst & 0x7) && count > 0) {
1331 uint s = (*src++).toArgb32();
1332 if (RGBA)
1333 s = ARGB2RGBA(x: s);
1334 *dst++ = s;
1335 i++;
1336 }
1337 const __m128i vhalf = _mm_set1_epi32(i: 0x80);
1338 const __m128i vzero = _mm_setzero_si128();
1339 for (; i < count-1; i += 2) {
1340 __m128i vs = _mm_loadu_si128(p: (const __m128i*)src);
1341 src += 2;
1342 if (!RGBA) {
1343 vs = _mm_shufflelo_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1344 vs = _mm_shufflehi_epi16(vs, _MM_SHUFFLE(3, 0, 1, 2));
1345 }
1346 __m128i v1 = _mm_unpacklo_epi16(a: vs, b: vzero);
1347 __m128i v2 = _mm_unpackhi_epi16(a: vs, b: vzero);
1348 v1 = _mm_add_epi32(a: v1, b: vhalf);
1349 v2 = _mm_add_epi32(a: v2, b: vhalf);
1350 v1 = _mm_sub_epi32(a: v1, b: _mm_srli_epi32(a: v1, count: 8));
1351 v2 = _mm_sub_epi32(a: v2, b: _mm_srli_epi32(a: v2, count: 8));
1352 v1 = _mm_srli_epi32(a: v1, count: 8);
1353 v2 = _mm_srli_epi32(a: v2, count: 8);
1354 v1 = _mm_packs_epi32(a: v1, b: v2);
1355 v1 = _mm_packus_epi16(a: v1, b: vzero);
1356 _mm_storel_epi64(p: (__m128i*)(dst), a: v1);
1357 dst += 2;
1358 }
1359#endif
1360 for (; i < count; i++) {
1361 uint s = (*src++).toArgb32();
1362 if (RGBA)
1363 s = ARGB2RGBA(x: s);
1364 *dst++ = s;
1365 }
1366}
1367template void qt_convertRGBA64ToARGB32<false>(uint *dst, const QRgba64 *src, int count);
1368template void qt_convertRGBA64ToARGB32<true>(uint *dst, const QRgba64 *src, int count);
1369
1370
1371static void QT_FASTCALL storeAlpha8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1372 const QVector<QRgb> *, QDitherInfo *)
1373{
1374 for (int i = 0; i < count; ++i)
1375 dest[index + i] = qAlpha(rgb: src[i]);
1376}
1377
1378static void QT_FASTCALL storeGrayscale8FromRGB32(uchar *dest, const uint *src, int index, int count,
1379 const QVector<QRgb> *, QDitherInfo *)
1380{
1381 for (int i = 0; i < count; ++i)
1382 dest[index + i] = qGray(rgb: src[i]);
1383}
1384
1385static void QT_FASTCALL storeGrayscale8FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1386 const QVector<QRgb> *, QDitherInfo *)
1387{
1388 for (int i = 0; i < count; ++i)
1389 dest[index + i] = qGray(rgb: qUnpremultiply(p: src[i]));
1390}
1391
1392static void QT_FASTCALL storeGrayscale16FromRGB32(uchar *dest, const uint *src, int index, int count,
1393 const QVector<QRgb> *, QDitherInfo *)
1394{
1395 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1396 for (int i = 0; i < count; ++i)
1397 d[i] = qGray(rgb: src[i]) * 257;
1398}
1399
1400static void QT_FASTCALL storeGrayscale16FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1401 const QVector<QRgb> *, QDitherInfo *)
1402{
1403 unsigned short *d = reinterpret_cast<unsigned short *>(dest) + index;
1404 for (int i = 0; i < count; ++i)
1405 d[i] = qGray(rgb: qUnpremultiply(p: src[i])) * 257;
1406}
1407
1408static const uint *QT_FASTCALL fetchRGB64ToRGB32(uint *buffer, const uchar *src, int index, int count,
1409 const QVector<QRgb> *, QDitherInfo *)
1410{
1411 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1412 for (int i = 0; i < count; ++i)
1413 buffer[i] = toArgb32(rgba64: s[i]);
1414 return buffer;
1415}
1416
1417static void QT_FASTCALL storeRGB64FromRGB32(uchar *dest, const uint *src, int index, int count,
1418 const QVector<QRgb> *, QDitherInfo *)
1419{
1420 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1421 for (int i = 0; i < count; ++i)
1422 d[i] = QRgba64::fromArgb32(rgb: src[i] | 0xff000000);
1423}
1424
1425static const uint *QT_FASTCALL fetchRGBA64ToARGB32PM(uint *buffer, const uchar *src, int index, int count,
1426 const QVector<QRgb> *, QDitherInfo *)
1427{
1428 const QRgba64 *s = reinterpret_cast<const QRgba64 *>(src) + index;
1429 for (int i = 0; i < count; ++i)
1430 buffer[i] = toArgb32(rgba64: s[i].premultiplied());
1431 return buffer;
1432}
1433
1434template<bool Mask>
1435static void QT_FASTCALL storeRGBA64FromARGB32PM(uchar *dest, const uint *src, int index, int count,
1436 const QVector<QRgb> *, QDitherInfo *)
1437{
1438 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1439 for (int i = 0; i < count; ++i) {
1440 d[i] = QRgba64::fromArgb32(rgb: src[i]).unpremultiplied();
1441 if (Mask)
1442 d[i].setAlpha(65535);
1443 }
1444}
1445
1446static void QT_FASTCALL storeRGBA64FromARGB32(uchar *dest, const uint *src, int index, int count,
1447 const QVector<QRgb> *, QDitherInfo *)
1448{
1449 QRgba64 *d = reinterpret_cast<QRgba64 *>(dest) + index;
1450 for (int i = 0; i < count; ++i)
1451 d[i] = QRgba64::fromArgb32(rgb: src[i]);
1452}
1453
1454// Note:
1455// convertToArgb32() assumes that no color channel is less than 4 bits.
1456// storeRGBFromARGB32PM() assumes that no color channel is more than 8 bits.
1457// QImage::rgbSwapped() assumes that the red and blue color channels have the same number of bits.
1458QPixelLayout qPixelLayouts[QImage::NImageFormats] = {
1459 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPPNone, .rbSwap: nullptr, .convertToARGB32PM: nullptr, .convertToRGBA64PM: nullptr, .fetchToARGB32PM: nullptr, .fetchToRGBA64PM: nullptr, .storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Invalid
1460 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP1MSB, .rbSwap: nullptr,
1461 .convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1462 .fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP1MSB>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP1MSB>,
1463 .storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Mono
1464 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP1LSB, .rbSwap: nullptr,
1465 .convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1466 .fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP1LSB>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP1LSB>,
1467 .storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_MonoLSB
1468 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1469 .convertToARGB32PM: convertIndexedToARGB32PM, .convertToRGBA64PM: convertIndexedToRGBA64PM,
1470 .fetchToARGB32PM: fetchIndexedToARGB32PM<QPixelLayout::BPP8>, .fetchToRGBA64PM: fetchIndexedToRGBA64PM<QPixelLayout::BPP8>,
1471 .storeFromARGB32PM: nullptr, .storeFromRGB32: nullptr }, // Format_Indexed8
1472 // Technically using convertPassThrough to convert from ARGB32PM to RGB32 is wrong,
1473 // but everywhere this generic conversion would be wrong is currently overloaded.
1474 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertPassThrough,
1475 .convertToRGBA64PM: convertRGB32ToRGB64, .fetchToARGB32PM: fetchPassThrough, .fetchToRGBA64PM: fetchRGB32ToRGB64, .storeFromARGB32PM: storePassThrough, .storeFromRGB32: storePassThrough }, // Format_RGB32
1476 { .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertARGB32ToARGB32PM,
1477 .convertToRGBA64PM: convertARGB32ToRGBA64PM, .fetchToARGB32PM: fetchARGB32ToARGB32PM, .fetchToRGBA64PM: fetchARGB32ToRGBA64PM, .storeFromARGB32PM: storeARGB32FromARGB32PM, .storeFromRGB32: storePassThrough }, // Format_ARGB32
1478 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb32, .convertToARGB32PM: convertPassThrough,
1479 .convertToRGBA64PM: convertARGB32PMToRGBA64PM, .fetchToARGB32PM: fetchPassThrough, .fetchToRGBA64PM: fetchARGB32PMToRGBA64PM, .storeFromARGB32PM: storePassThrough, .storeFromRGB32: storePassThrough }, // Format_ARGB32_Premultiplied
1480 pixelLayoutRGB<QImage::Format_RGB16>(),
1481 pixelLayoutARGBPM<QImage::Format_ARGB8565_Premultiplied>(),
1482 pixelLayoutRGB<QImage::Format_RGB666>(),
1483 pixelLayoutARGBPM<QImage::Format_ARGB6666_Premultiplied>(),
1484 pixelLayoutRGB<QImage::Format_RGB555>(),
1485 pixelLayoutARGBPM<QImage::Format_ARGB8555_Premultiplied>(),
1486 pixelLayoutRGB<QImage::Format_RGB888>(),
1487 pixelLayoutRGB<QImage::Format_RGB444>(),
1488 pixelLayoutARGBPM<QImage::Format_ARGB4444_Premultiplied>(),
1489 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888PMToARGB32PM,
1490 .convertToRGBA64PM: convertRGBA8888PMToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888PMToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888PMToRGBA64PM, .storeFromARGB32PM: storeRGBXFromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBX8888
1491 { .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888ToARGB32PM,
1492 .convertToRGBA64PM: convertRGBA8888ToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888ToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888ToRGBA64PM, .storeFromARGB32PM: storeRGBA8888FromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBA8888
1493 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap<QImage::Format_RGBA8888>, .convertToARGB32PM: convertRGBA8888PMToARGB32PM,
1494 .convertToRGBA64PM: convertRGBA8888PMToRGBA64PM, .fetchToARGB32PM: fetchRGBA8888PMToARGB32PM, .fetchToRGBA64PM: fetchRGBA8888PMToRGBA64PM, .storeFromARGB32PM: storeRGBA8888PMFromARGB32PM, .storeFromRGB32: storeRGBXFromRGB32 }, // Format_RGBA8888_Premultiplied
1495 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1496 .convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1497 .convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1498 .fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1499 .fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1500 .storeFromARGB32PM: storeRGB30FromARGB32PM<PixelOrderBGR>,
1501 .storeFromRGB32: storeRGB30FromRGB32<PixelOrderBGR>
1502 }, // Format_BGR30
1503 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1504 .convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderBGR>,
1505 .convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1506 .fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderBGR>,
1507 .fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderBGR>,
1508 .storeFromARGB32PM: storeA2RGB30PMFromARGB32PM<PixelOrderBGR>,
1509 .storeFromRGB32: storeRGB30FromRGB32<PixelOrderBGR>
1510 }, // Format_A2BGR30_Premultiplied
1511 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1512 .convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1513 .convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1514 .fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1515 .fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1516 .storeFromARGB32PM: storeRGB30FromARGB32PM<PixelOrderRGB>,
1517 .storeFromRGB32: storeRGB30FromRGB32<PixelOrderRGB>
1518 }, // Format_RGB30
1519 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP32, .rbSwap: rbSwap_rgb30,
1520 .convertToARGB32PM: convertA2RGB30PMToARGB32PM<PixelOrderRGB>,
1521 .convertToRGBA64PM: convertA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1522 .fetchToARGB32PM: fetchA2RGB30PMToARGB32PM<PixelOrderRGB>,
1523 .fetchToRGBA64PM: fetchA2RGB30PMToRGBA64PM<PixelOrderRGB>,
1524 .storeFromARGB32PM: storeA2RGB30PMFromARGB32PM<PixelOrderRGB>,
1525 .storeFromRGB32: storeRGB30FromRGB32<PixelOrderRGB>
1526 }, // Format_A2RGB30_Premultiplied
1527 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1528 .convertToARGB32PM: convertAlpha8ToRGB32, .convertToRGBA64PM: convertAlpha8ToRGB64,
1529 .fetchToARGB32PM: fetchAlpha8ToRGB32, .fetchToRGBA64PM: fetchAlpha8ToRGB64,
1530 .storeFromARGB32PM: storeAlpha8FromARGB32PM, .storeFromRGB32: nullptr }, // Format_Alpha8
1531 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP8, .rbSwap: nullptr,
1532 .convertToARGB32PM: convertGrayscale8ToRGB32, .convertToRGBA64PM: convertGrayscale8ToRGB64,
1533 .fetchToARGB32PM: fetchGrayscale8ToRGB32, .fetchToRGBA64PM: fetchGrayscale8ToRGB64,
1534 .storeFromARGB32PM: storeGrayscale8FromARGB32PM, .storeFromRGB32: storeGrayscale8FromRGB32 }, // Format_Grayscale8
1535 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1536 .convertToARGB32PM: convertPassThrough, .convertToRGBA64PM: nullptr,
1537 .fetchToARGB32PM: fetchRGB64ToRGB32, .fetchToRGBA64PM: fetchPassThrough64,
1538 .storeFromARGB32PM: storeRGBA64FromARGB32PM<true>, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBX64
1539 { .hasAlphaChannel: true, .premultiplied: false, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1540 .convertToARGB32PM: convertARGB32ToARGB32PM, .convertToRGBA64PM: nullptr,
1541 .fetchToARGB32PM: fetchRGBA64ToARGB32PM, .fetchToRGBA64PM: fetchRGBA64ToRGBA64PM,
1542 .storeFromARGB32PM: storeRGBA64FromARGB32PM<false>, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBA64
1543 { .hasAlphaChannel: true, .premultiplied: true, .bpp: QPixelLayout::BPP64, .rbSwap: nullptr,
1544 .convertToARGB32PM: convertPassThrough, .convertToRGBA64PM: nullptr,
1545 .fetchToARGB32PM: fetchRGB64ToRGB32, .fetchToRGBA64PM: fetchPassThrough64,
1546 .storeFromARGB32PM: storeRGBA64FromARGB32, .storeFromRGB32: storeRGB64FromRGB32 }, // Format_RGBA64_Premultiplied
1547 { .hasAlphaChannel: false, .premultiplied: false, .bpp: QPixelLayout::BPP16, .rbSwap: nullptr,
1548 .convertToARGB32PM: convertGrayscale16ToRGB32, .convertToRGBA64PM: convertGrayscale16ToRGBA64,
1549 .fetchToARGB32PM: fetchGrayscale16ToRGB32, .fetchToRGBA64PM: fetchGrayscale16ToRGBA64,
1550 .storeFromARGB32PM: storeGrayscale16FromARGB32PM, .storeFromRGB32: storeGrayscale16FromRGB32 }, // Format_Grayscale16
1551 pixelLayoutRGB<QImage::Format_BGR888>(),
1552};
1553
1554Q_STATIC_ASSERT(sizeof(qPixelLayouts) / sizeof(*qPixelLayouts) == QImage::NImageFormats);
1555
1556static void QT_FASTCALL convertFromRgb64(uint *dest, const QRgba64 *src, int length)
1557{
1558 for (int i = 0; i < length; ++i) {
1559 dest[i] = toArgb32(rgba64: src[i]);
1560 }
1561}
1562
1563template<QImage::Format format>
1564static void QT_FASTCALL storeGenericFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1565 const QVector<QRgb> *clut, QDitherInfo *dither)
1566{
1567 uint buffer[BufferSize];
1568 convertFromRgb64(dest: buffer, src, length: count);
1569 qPixelLayouts[format].storeFromARGB32PM(dest, buffer, index, count, clut, dither);
1570}
1571
1572static void QT_FASTCALL storeARGB32FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1573 const QVector<QRgb> *, QDitherInfo *)
1574{
1575 uint *d = (uint*)dest + index;
1576 for (int i = 0; i < count; ++i)
1577 d[i] = toArgb32(rgba64: src[i].unpremultiplied());
1578}
1579
1580static void QT_FASTCALL storeRGBA8888FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1581 const QVector<QRgb> *, QDitherInfo *)
1582{
1583 uint *d = (uint*)dest + index;
1584 for (int i = 0; i < count; ++i)
1585 d[i] = toRgba8888(rgba64: src[i].unpremultiplied());
1586}
1587
1588template<QtPixelOrder PixelOrder>
1589static void QT_FASTCALL storeRGB30FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1590 const QVector<QRgb> *, QDitherInfo *)
1591{
1592 uint *d = (uint*)dest + index;
1593#ifdef __SSE2__
1594 qConvertRGBA64PMToA2RGB30PM_sse2<PixelOrder>(d, src, count);
1595#else
1596 for (int i = 0; i < count; ++i)
1597 d[i] = qConvertRgb64ToRgb30<PixelOrder>(src[i]);
1598#endif
1599}
1600
1601static void QT_FASTCALL storeRGBX64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1602 const QVector<QRgb> *, QDitherInfo *)
1603{
1604 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1605 for (int i = 0; i < count; ++i) {
1606 d[i] = src[i].unpremultiplied();
1607 d[i].setAlpha(65535);
1608 }
1609}
1610
1611static void QT_FASTCALL storeRGBA64FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1612 const QVector<QRgb> *, QDitherInfo *)
1613{
1614 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1615 for (int i = 0; i < count; ++i)
1616 d[i] = src[i].unpremultiplied();
1617}
1618
1619static void QT_FASTCALL storeRGBA64PMFromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1620 const QVector<QRgb> *, QDitherInfo *)
1621{
1622 QRgba64 *d = reinterpret_cast<QRgba64*>(dest) + index;
1623 if (d != src)
1624 memcpy(dest: d, src: src, n: count * sizeof(QRgba64));
1625}
1626
1627static void QT_FASTCALL storeGray16FromRGBA64PM(uchar *dest, const QRgba64 *src, int index, int count,
1628 const QVector<QRgb> *, QDitherInfo *)
1629{
1630 quint16 *d = reinterpret_cast<quint16*>(dest) + index;
1631 for (int i = 0; i < count; ++i) {
1632 QRgba64 s = src[i].unpremultiplied();
1633 d[i] = qGray(r: s.red(), g: s.green(), b: s.blue());
1634 }
1635}
1636
1637ConvertAndStorePixelsFunc64 qStoreFromRGBA64PM[QImage::NImageFormats] = {
1638 nullptr,
1639 nullptr,
1640 nullptr,
1641 nullptr,
1642 storeGenericFromRGBA64PM<QImage::Format_RGB32>,
1643 storeARGB32FromRGBA64PM,
1644 storeGenericFromRGBA64PM<QImage::Format_ARGB32_Premultiplied>,
1645 storeGenericFromRGBA64PM<QImage::Format_RGB16>,
1646 storeGenericFromRGBA64PM<QImage::Format_ARGB8565_Premultiplied>,
1647 storeGenericFromRGBA64PM<QImage::Format_RGB666>,
1648 storeGenericFromRGBA64PM<QImage::Format_ARGB6666_Premultiplied>,
1649 storeGenericFromRGBA64PM<QImage::Format_RGB555>,
1650 storeGenericFromRGBA64PM<QImage::Format_ARGB8555_Premultiplied>,
1651 storeGenericFromRGBA64PM<QImage::Format_RGB888>,
1652 storeGenericFromRGBA64PM<QImage::Format_RGB444>,
1653 storeGenericFromRGBA64PM<QImage::Format_ARGB4444_Premultiplied>,
1654 storeGenericFromRGBA64PM<QImage::Format_RGBX8888>,
1655 storeRGBA8888FromRGBA64PM,
1656 storeGenericFromRGBA64PM<QImage::Format_RGBA8888_Premultiplied>,
1657 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1658 storeRGB30FromRGBA64PM<PixelOrderBGR>,
1659 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1660 storeRGB30FromRGBA64PM<PixelOrderRGB>,
1661 storeGenericFromRGBA64PM<QImage::Format_Alpha8>,
1662 storeGenericFromRGBA64PM<QImage::Format_Grayscale8>,
1663 storeRGBX64FromRGBA64PM,
1664 storeRGBA64FromRGBA64PM,
1665 storeRGBA64PMFromRGBA64PM,
1666 storeGray16FromRGBA64PM,
1667 storeGenericFromRGBA64PM<QImage::Format_BGR888>,
1668};
1669
1670/*
1671 Destination fetch. This is simple as we don't have to do bounds checks or
1672 transformations
1673*/
1674
1675static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1676{
1677 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1678 uint *start = buffer;
1679 const uint *end = buffer + length;
1680 while (buffer < end) {
1681 *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1682 ++buffer;
1683 ++x;
1684 }
1685 return start;
1686}
1687
1688static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1689{
1690 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1691 uint *start = buffer;
1692 const uint *end = buffer + length;
1693 while (buffer < end) {
1694 *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
1695 ++buffer;
1696 ++x;
1697 }
1698 return start;
1699}
1700
1701static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
1702{
1703 return (uint *)rasterBuffer->scanLine(y) + x;
1704}
1705
1706static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1707{
1708 const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
1709 for (int i = 0; i < length; ++i)
1710 buffer[i] = qConvertRgb16To32(c: data[i]);
1711 return buffer;
1712}
1713
1714static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1715{
1716 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1717 return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1718}
1719
1720static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
1721{
1722 return buffer;
1723}
1724
1725static DestFetchProc destFetchProc[QImage::NImageFormats] =
1726{
1727 nullptr, // Format_Invalid
1728 destFetchMono, // Format_Mono,
1729 destFetchMonoLsb, // Format_MonoLSB
1730 nullptr, // Format_Indexed8
1731 destFetchARGB32P, // Format_RGB32
1732 destFetch, // Format_ARGB32,
1733 destFetchARGB32P, // Format_ARGB32_Premultiplied
1734 destFetchRGB16, // Format_RGB16
1735 destFetch, // Format_ARGB8565_Premultiplied
1736 destFetch, // Format_RGB666
1737 destFetch, // Format_ARGB6666_Premultiplied
1738 destFetch, // Format_RGB555
1739 destFetch, // Format_ARGB8555_Premultiplied
1740 destFetch, // Format_RGB888
1741 destFetch, // Format_RGB444
1742 destFetch, // Format_ARGB4444_Premultiplied
1743 destFetch, // Format_RGBX8888
1744 destFetch, // Format_RGBA8888
1745 destFetch, // Format_RGBA8888_Premultiplied
1746 destFetch, // Format_BGR30
1747 destFetch, // Format_A2BGR30_Premultiplied
1748 destFetch, // Format_RGB30
1749 destFetch, // Format_A2RGB30_Premultiplied
1750 destFetch, // Format_Alpha8
1751 destFetch, // Format_Grayscale8
1752 destFetch, // Format_RGBX64
1753 destFetch, // Format_RGBA64
1754 destFetch, // Format_RGBA64_Premultiplied
1755 destFetch, // Format_Grayscale16
1756 destFetch, // Format_BGR888
1757};
1758
1759#if QT_CONFIG(raster_64bit)
1760static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
1761{
1762 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1763 return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
1764}
1765
1766static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
1767{
1768 return (QRgba64 *)rasterBuffer->scanLine(y) + x;
1769}
1770
1771static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
1772{
1773 return buffer;
1774}
1775
1776static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
1777{
1778 nullptr, // Format_Invalid
1779 nullptr, // Format_Mono,
1780 nullptr, // Format_MonoLSB
1781 nullptr, // Format_Indexed8
1782 destFetch64, // Format_RGB32
1783 destFetch64, // Format_ARGB32,
1784 destFetch64, // Format_ARGB32_Premultiplied
1785 destFetch64, // Format_RGB16
1786 destFetch64, // Format_ARGB8565_Premultiplied
1787 destFetch64, // Format_RGB666
1788 destFetch64, // Format_ARGB6666_Premultiplied
1789 destFetch64, // Format_RGB555
1790 destFetch64, // Format_ARGB8555_Premultiplied
1791 destFetch64, // Format_RGB888
1792 destFetch64, // Format_RGB444
1793 destFetch64, // Format_ARGB4444_Premultiplied
1794 destFetch64, // Format_RGBX8888
1795 destFetch64, // Format_RGBA8888
1796 destFetch64, // Format_RGBA8888_Premultiplied
1797 destFetch64, // Format_BGR30
1798 destFetch64, // Format_A2BGR30_Premultiplied
1799 destFetch64, // Format_RGB30
1800 destFetch64, // Format_A2RGB30_Premultiplied
1801 destFetch64, // Format_Alpha8
1802 destFetch64, // Format_Grayscale8
1803 destFetchRGB64, // Format_RGBX64
1804 destFetch64, // Format_RGBA64
1805 destFetchRGB64, // Format_RGBA64_Premultiplied
1806 destFetch64, // Format_Grayscale16
1807 destFetch64, // Format_BGR888
1808};
1809#endif
1810
1811/*
1812 Returns the color in the mono destination color table
1813 that is the "nearest" to /color/.
1814*/
1815static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
1816{
1817 QRgb color_0 = qPremultiply(x: rbuf->destColor0);
1818 QRgb color_1 = qPremultiply(x: rbuf->destColor1);
1819 color = qPremultiply(x: color);
1820
1821 int r = qRed(rgb: color);
1822 int g = qGreen(rgb: color);
1823 int b = qBlue(rgb: color);
1824 int rx, gx, bx;
1825 int dist_0, dist_1;
1826
1827 rx = r - qRed(rgb: color_0);
1828 gx = g - qGreen(rgb: color_0);
1829 bx = b - qBlue(rgb: color_0);
1830 dist_0 = rx*rx + gx*gx + bx*bx;
1831
1832 rx = r - qRed(rgb: color_1);
1833 gx = g - qGreen(rgb: color_1);
1834 bx = b - qBlue(rgb: color_1);
1835 dist_1 = rx*rx + gx*gx + bx*bx;
1836
1837 if (dist_0 < dist_1)
1838 return color_0;
1839 return color_1;
1840}
1841
1842/*
1843 Destination store.
1844*/
1845
1846static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1847{
1848 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1849 if (rasterBuffer->monoDestinationWithClut) {
1850 for (int i = 0; i < length; ++i) {
1851 if (buffer[i] == rasterBuffer->destColor0) {
1852 data[x >> 3] &= ~(0x80 >> (x & 7));
1853 } else if (buffer[i] == rasterBuffer->destColor1) {
1854 data[x >> 3] |= 0x80 >> (x & 7);
1855 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
1856 data[x >> 3] &= ~(0x80 >> (x & 7));
1857 } else {
1858 data[x >> 3] |= 0x80 >> (x & 7);
1859 }
1860 ++x;
1861 }
1862 } else {
1863 for (int i = 0; i < length; ++i) {
1864 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1865 data[x >> 3] |= 0x80 >> (x & 7);
1866 else
1867 data[x >> 3] &= ~(0x80 >> (x & 7));
1868 ++x;
1869 }
1870 }
1871}
1872
1873static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1874{
1875 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
1876 if (rasterBuffer->monoDestinationWithClut) {
1877 for (int i = 0; i < length; ++i) {
1878 if (buffer[i] == rasterBuffer->destColor0) {
1879 data[x >> 3] &= ~(1 << (x & 7));
1880 } else if (buffer[i] == rasterBuffer->destColor1) {
1881 data[x >> 3] |= 1 << (x & 7);
1882 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
1883 data[x >> 3] &= ~(1 << (x & 7));
1884 } else {
1885 data[x >> 3] |= 1 << (x & 7);
1886 }
1887 ++x;
1888 }
1889 } else {
1890 for (int i = 0; i < length; ++i) {
1891 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
1892 data[x >> 3] |= 1 << (x & 7);
1893 else
1894 data[x >> 3] &= ~(1 << (x & 7));
1895 ++x;
1896 }
1897 }
1898}
1899
1900static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1901{
1902 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
1903 for (int i = 0; i < length; ++i)
1904 data[i] = qConvertRgb32To16(c: buffer[i]);
1905}
1906
1907static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
1908{
1909 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
1910 ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
1911 if (!layout->premultiplied && !layout->hasAlphaChannel)
1912 store = layout->storeFromRGB32;
1913 uchar *dest = rasterBuffer->scanLine(y);
1914 store(dest, buffer, x, length, nullptr, nullptr);
1915}
1916
1917static DestStoreProc destStoreProc[QImage::NImageFormats] =
1918{
1919 nullptr, // Format_Invalid
1920 destStoreMono, // Format_Mono,
1921 destStoreMonoLsb, // Format_MonoLSB
1922 nullptr, // Format_Indexed8
1923 nullptr, // Format_RGB32
1924 destStore, // Format_ARGB32,
1925 nullptr, // Format_ARGB32_Premultiplied
1926 destStoreRGB16, // Format_RGB16
1927 destStore, // Format_ARGB8565_Premultiplied
1928 destStore, // Format_RGB666
1929 destStore, // Format_ARGB6666_Premultiplied
1930 destStore, // Format_RGB555
1931 destStore, // Format_ARGB8555_Premultiplied
1932 destStore, // Format_RGB888
1933 destStore, // Format_RGB444
1934 destStore, // Format_ARGB4444_Premultiplied
1935 destStore, // Format_RGBX8888
1936 destStore, // Format_RGBA8888
1937 destStore, // Format_RGBA8888_Premultiplied
1938 destStore, // Format_BGR30
1939 destStore, // Format_A2BGR30_Premultiplied
1940 destStore, // Format_RGB30
1941 destStore, // Format_A2RGB30_Premultiplied
1942 destStore, // Format_Alpha8
1943 destStore, // Format_Grayscale8
1944 destStore, // Format_RGBX64
1945 destStore, // Format_RGBA64
1946 destStore, // Format_RGBA64_Premultiplied
1947 destStore, // Format_Grayscale16
1948 destStore, // Format_BGR888
1949};
1950
1951#if QT_CONFIG(raster_64bit)
1952static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1953{
1954 auto store = qStoreFromRGBA64PM[rasterBuffer->format];
1955 uchar *dest = rasterBuffer->scanLine(y);
1956 store(dest, buffer, x, length, nullptr, nullptr);
1957}
1958
1959static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
1960{
1961 QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
1962 for (int i = 0; i < length; ++i) {
1963 dest[i] = buffer[i].unpremultiplied();
1964 }
1965}
1966
1967static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
1968{
1969 nullptr, // Format_Invalid
1970 nullptr, // Format_Mono,
1971 nullptr, // Format_MonoLSB
1972 nullptr, // Format_Indexed8
1973 destStore64, // Format_RGB32
1974 destStore64, // Format_ARGB32,
1975 destStore64, // Format_ARGB32_Premultiplied
1976 destStore64, // Format_RGB16
1977 destStore64, // Format_ARGB8565_Premultiplied
1978 destStore64, // Format_RGB666
1979 destStore64, // Format_ARGB6666_Premultiplied
1980 destStore64, // Format_RGB555
1981 destStore64, // Format_ARGB8555_Premultiplied
1982 destStore64, // Format_RGB888
1983 destStore64, // Format_RGB444
1984 destStore64, // Format_ARGB4444_Premultiplied
1985 destStore64, // Format_RGBX8888
1986 destStore64, // Format_RGBA8888
1987 destStore64, // Format_RGBA8888_Premultiplied
1988 destStore64, // Format_BGR30
1989 destStore64, // Format_A2BGR30_Premultiplied
1990 destStore64, // Format_RGB30
1991 destStore64, // Format_A2RGB30_Premultiplied
1992 destStore64, // Format_Alpha8
1993 destStore64, // Format_Grayscale8
1994 nullptr, // Format_RGBX64
1995 destStore64RGBA64, // Format_RGBA64
1996 nullptr, // Format_RGBA64_Premultiplied
1997 destStore64, // Format_Grayscale16
1998 destStore64, // Format_BGR888
1999};
2000#endif
2001
2002/*
2003 Source fetches
2004
2005 This is a bit more complicated, as we need several fetch routines for every surface type
2006
2007 We need 5 fetch methods per surface type:
2008 untransformed
2009 transformed (tiled and not tiled)
2010 transformed bilinear (tiled and not tiled)
2011
2012 We don't need bounds checks for untransformed, but we need them for the other ones.
2013
2014 The generic implementation does pixel by pixel fetches
2015*/
2016
2017enum TextureBlendType {
2018 BlendUntransformed,
2019 BlendTiled,
2020 BlendTransformed,
2021 BlendTransformedTiled,
2022 BlendTransformedBilinear,
2023 BlendTransformedBilinearTiled,
2024 NBlendTypes
2025};
2026
2027static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
2028 const QSpanData *data, int y, int x, int length)
2029{
2030 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2031 return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2032}
2033
2034static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
2035 const QSpanData *data, int y, int x, int)
2036{
2037 const uchar *scanLine = data->texture.scanLine(y);
2038 return reinterpret_cast<const uint *>(scanLine) + x;
2039}
2040
2041static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
2042 const QSpanData *data, int y, int x,
2043 int length)
2044{
2045 const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
2046 for (int i = 0; i < length; ++i)
2047 buffer[i] = qConvertRgb16To32(c: scanLine[i]);
2048 return buffer;
2049}
2050
2051#if QT_CONFIG(raster_64bit)
2052static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
2053 const QSpanData *data, int y, int x, int length)
2054{
2055 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2056 return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
2057}
2058
2059static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
2060 const QSpanData *data, int y, int x, int)
2061{
2062 const uchar *scanLine = data->texture.scanLine(y);
2063 return reinterpret_cast<const QRgba64 *>(scanLine) + x;
2064}
2065#endif
2066
2067template<TextureBlendType blendType>
2068inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
2069{
2070 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2071 if (blendType == BlendTransformedTiled) {
2072 if (v < 0 || v >= max) {
2073 v %= max;
2074 if (v < 0) v += max;
2075 }
2076 } else {
2077 v = qBound(min: l1, val: v, max: l2);
2078 }
2079}
2080
2081static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
2082{
2083 if (Q_UNLIKELY(!data->fast_matrix))
2084 return false;
2085
2086 qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
2087 qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
2088 qreal minc = std::min(a: fx, b: fy);
2089 qreal maxc = std::max(a: fx, b: fy);
2090 fx += std::trunc(x: data->m11 * fixed_scale) * length;
2091 fy += std::trunc(x: data->m12 * fixed_scale) * length;
2092 minc = std::min(a: minc, b: std::min(a: fx, b: fy));
2093 maxc = std::max(a: maxc, b: std::max(a: fx, b: fy));
2094
2095 return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
2096}
2097
2098template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2099static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
2100 int y, int x, int length)
2101{
2102 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2103 const QTextureData &image = data->texture;
2104
2105 const qreal cx = x + qreal(0.5);
2106 const qreal cy = y + qreal(0.5);
2107
2108 constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
2109 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2110 if (!useFetch)
2111 Q_ASSERT(layout->bpp == bpp);
2112 // When templated 'fetch' should be inlined at compile time:
2113 const FetchPixelFunc fetch = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : FetchPixelFunc(fetchPixel<bpp>);
2114
2115 if (canUseFastMatrixPath(cx, cy, length, data)) {
2116 // The increment pr x in the scanline
2117 int fdx = (int)(data->m11 * fixed_scale);
2118 int fdy = (int)(data->m12 * fixed_scale);
2119
2120 int fx = int((data->m21 * cy
2121 + data->m11 * cx + data->dx) * fixed_scale);
2122 int fy = int((data->m22 * cy
2123 + data->m12 * cx + data->dy) * fixed_scale);
2124
2125 if (fdy == 0) { // simple scale, no rotation or shear
2126 int py = (fy >> 16);
2127 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2128 const uchar *src = image.scanLine(y: py);
2129
2130 int i = 0;
2131 if (blendType == BlendTransformed) {
2132 int fastLen = length;
2133 if (fdx > 0)
2134 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2135 else if (fdx < 0)
2136 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2137
2138 for (; i < fastLen; ++i) {
2139 int x1 = (fx >> 16);
2140 int x2 = x1;
2141 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2142 if (x1 == x2)
2143 break;
2144 if (useFetch)
2145 buffer[i] = fetch(src, x1);
2146 else
2147 buffer[i] = reinterpret_cast<const T*>(src)[x1];
2148 fx += fdx;
2149 }
2150
2151 for (; i < fastLen; ++i) {
2152 int px = (fx >> 16);
2153 if (useFetch)
2154 buffer[i] = fetch(src, px);
2155 else
2156 buffer[i] = reinterpret_cast<const T*>(src)[px];
2157 fx += fdx;
2158 }
2159 }
2160
2161 for (; i < length; ++i) {
2162 int px = (fx >> 16);
2163 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2164 if (useFetch)
2165 buffer[i] = fetch(src, px);
2166 else
2167 buffer[i] = reinterpret_cast<const T*>(src)[px];
2168 fx += fdx;
2169 }
2170 } else { // rotation or shear
2171 int i = 0;
2172 if (blendType == BlendTransformed) {
2173 int fastLen = length;
2174 if (fdx > 0)
2175 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2176 else if (fdx < 0)
2177 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2178 if (fdy > 0)
2179 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2180 else if (fdy < 0)
2181 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
2182
2183 for (; i < fastLen; ++i) {
2184 int x1 = (fx >> 16);
2185 int y1 = (fy >> 16);
2186 int x2 = x1;
2187 int y2 = y1;
2188 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
2189 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
2190 if (x1 == x2 && y1 == y2)
2191 break;
2192 if (useFetch)
2193 buffer[i] = fetch(image.scanLine(y: y1), x1);
2194 else
2195 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: y1))[x1];
2196 fx += fdx;
2197 fy += fdy;
2198 }
2199
2200 for (; i < fastLen; ++i) {
2201 int px = (fx >> 16);
2202 int py = (fy >> 16);
2203 if (useFetch)
2204 buffer[i] = fetch(image.scanLine(y: py), px);
2205 else
2206 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
2207 fx += fdx;
2208 fy += fdy;
2209 }
2210 }
2211
2212 for (; i < length; ++i) {
2213 int px = (fx >> 16);
2214 int py = (fy >> 16);
2215 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2216 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2217 if (useFetch)
2218 buffer[i] = fetch(image.scanLine(y: py), px);
2219 else
2220 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
2221 fx += fdx;
2222 fy += fdy;
2223 }
2224 }
2225 } else {
2226 const qreal fdx = data->m11;
2227 const qreal fdy = data->m12;
2228 const qreal fdw = data->m13;
2229
2230 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2231 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2232 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2233
2234 T *const end = buffer + length;
2235 T *b = buffer;
2236 while (b < end) {
2237 const qreal iw = fw == 0 ? 1 : 1 / fw;
2238 const qreal tx = fx * iw;
2239 const qreal ty = fy * iw;
2240 int px = qFloor(v: tx);
2241 int py = qFloor(v: ty);
2242
2243 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
2244 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
2245 if (useFetch)
2246 *b = fetch(image.scanLine(y: py), px);
2247 else
2248 *b = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
2249
2250 fx += fdx;
2251 fy += fdy;
2252 fw += fdw;
2253 //force increment to avoid /0
2254 if (!fw) {
2255 fw += fdw;
2256 }
2257 ++b;
2258 }
2259 }
2260}
2261
2262template<TextureBlendType blendType, QPixelLayout::BPP bpp>
2263static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
2264 int y, int x, int length)
2265{
2266 Q_STATIC_ASSERT(blendType == BlendTransformed || blendType == BlendTransformedTiled);
2267 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2268 fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
2269 layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
2270 return buffer;
2271}
2272
2273#if QT_CONFIG(raster_64bit)
2274template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
2275static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
2276 int y, int x, int length)
2277{
2278 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2279 if (layout->bpp != QPixelLayout::BPP64) {
2280 uint buffer32[BufferSize];
2281 Q_ASSERT(length <= BufferSize);
2282 if (layout->bpp == QPixelLayout::BPP32)
2283 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
2284 else
2285 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
2286 return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
2287 }
2288
2289 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, QRgba64>(buffer, data, y, x, length);
2290 if (data->texture.format == QImage::Format_RGBA64)
2291 convertRGBA64ToRGBA64PM(buffer, count: length);
2292 return buffer;
2293}
2294#endif
2295
2296/** \internal
2297 interpolate 4 argb pixels with the distx and disty factor.
2298 distx and disty must be between 0 and 16
2299 */
2300static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
2301{
2302 uint distxy = distx * disty;
2303 //idistx * disty = (16-distx) * disty = 16*disty - distxy
2304 //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
2305 uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy);
2306 uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy);
2307 uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy));
2308 uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
2309 uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy));
2310 uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
2311 uint brrb = ((br & 0x00ff00ff) * (distxy));
2312 uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
2313 return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
2314}
2315
2316#if defined(__SSE2__)
2317#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
2318{ \
2319 const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
2320 const __m128i distx_ = _mm_slli_epi16(distx, 4); \
2321 const __m128i disty_ = _mm_slli_epi16(disty, 4); \
2322 const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
2323 const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
2324 const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
2325 \
2326 __m128i tlAG = _mm_srli_epi16(tl, 8); \
2327 __m128i tlRB = _mm_and_si128(tl, colorMask); \
2328 __m128i trAG = _mm_srli_epi16(tr, 8); \
2329 __m128i trRB = _mm_and_si128(tr, colorMask); \
2330 __m128i blAG = _mm_srli_epi16(bl, 8); \
2331 __m128i blRB = _mm_and_si128(bl, colorMask); \
2332 __m128i brAG = _mm_srli_epi16(br, 8); \
2333 __m128i brRB = _mm_and_si128(br, colorMask); \
2334 \
2335 tlAG = _mm_mullo_epi16(tlAG, idxidy); \
2336 tlRB = _mm_mullo_epi16(tlRB, idxidy); \
2337 trAG = _mm_mullo_epi16(trAG, dxidy); \
2338 trRB = _mm_mullo_epi16(trRB, dxidy); \
2339 blAG = _mm_mullo_epi16(blAG, idxdy); \
2340 blRB = _mm_mullo_epi16(blRB, idxdy); \
2341 brAG = _mm_mullo_epi16(brAG, dxdy); \
2342 brRB = _mm_mullo_epi16(brRB, dxdy); \
2343 \
2344 /* Add the values, and shift to only keep 8 significant bits per colors */ \
2345 __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
2346 __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
2347 rAG = _mm_andnot_si128(colorMask, rAG); \
2348 rRB = _mm_srli_epi16(rRB, 8); \
2349 _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
2350}
2351#endif
2352
2353#if defined(__ARM_NEON__)
2354#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
2355{ \
2356 const int16x8_t dxdy = vmulq_s16(distx, disty); \
2357 const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
2358 const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
2359 const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
2360 const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
2361 \
2362 int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
2363 int16x8_t tlRB = vandq_s16(tl, colorMask); \
2364 int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
2365 int16x8_t trRB = vandq_s16(tr, colorMask); \
2366 int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
2367 int16x8_t blRB = vandq_s16(bl, colorMask); \
2368 int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
2369 int16x8_t brRB = vandq_s16(br, colorMask); \
2370 \
2371 int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
2372 int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
2373 rAG = vmlaq_s16(rAG, trAG, dxidy); \
2374 rRB = vmlaq_s16(rRB, trRB, dxidy); \
2375 rAG = vmlaq_s16(rAG, blAG, idxdy); \
2376 rRB = vmlaq_s16(rRB, blRB, idxdy); \
2377 rAG = vmlaq_s16(rAG, brAG, dxdy); \
2378 rRB = vmlaq_s16(rRB, brRB, dxdy); \
2379 \
2380 rAG = vandq_s16(invColorMask, rAG); \
2381 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
2382 vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
2383}
2384#endif
2385
2386template<TextureBlendType blendType>
2387void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
2388
2389template<>
2390inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
2391{
2392 v1 %= max;
2393 if (v1 < 0)
2394 v1 += max;
2395 v2 = v1 + 1;
2396 if (v2 == max)
2397 v2 = 0;
2398 Q_ASSERT(v1 >= 0 && v1 < max);
2399 Q_ASSERT(v2 >= 0 && v2 < max);
2400}
2401
2402template<>
2403inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
2404{
2405 if (v1 < l1)
2406 v2 = v1 = l1;
2407 else if (v1 >= l2)
2408 v2 = v1 = l2;
2409 else
2410 v2 = v1 + 1;
2411 Q_ASSERT(v1 >= l1 && v1 <= l2);
2412 Q_ASSERT(v2 >= l1 && v2 <= l2);
2413}
2414
2415enum FastTransformTypes {
2416 SimpleScaleTransform,
2417 UpscaleTransform,
2418 DownscaleTransform,
2419 RotateTransform,
2420 FastRotateTransform,
2421 NFastTransformTypes
2422};
2423
2424// Completes the partial interpolation stored in IntermediateBuffer.
2425// by performing the x-axis interpolation and joining the RB and AG buffers.
2426static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
2427{
2428#if defined(QT_COMPILER_SUPPORTS_AVX2)
2429 extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
2430 if (qCpuHasFeature(ArchHaswell))
2431 return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
2432#endif
2433
2434 // Switch to intermediate buffer coordinates
2435 fx -= offset * fixed_scale;
2436
2437 while (b < end) {
2438 const int x = (fx >> 16);
2439
2440 const uint distx = (fx & 0x0000ffff) >> 8;
2441 const uint idistx = 256 - distx;
2442 const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
2443 const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
2444 *b = (rb >> 8) | ag;
2445 b++;
2446 fx += fdx;
2447 }
2448 fx += offset * fixed_scale;
2449}
2450
2451typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
2452
2453template<TextureBlendType blendType>
2454static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
2455 int &fx, int &fy, int fdx, int /*fdy*/)
2456{
2457 int y1 = (fy >> 16);
2458 int y2;
2459 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2460 const uint *s1 = (const uint *)image.scanLine(y: y1);
2461 const uint *s2 = (const uint *)image.scanLine(y: y2);
2462
2463 const int disty = (fy & 0x0000ffff) >> 8;
2464 const int idisty = 256 - disty;
2465 const int length = end - b;
2466
2467 // The intermediate buffer is generated in the positive direction
2468 const int adjust = (fdx < 0) ? fdx * length : 0;
2469 const int offset = (fx + adjust) >> 16;
2470 int x = offset;
2471
2472 IntermediateBuffer intermediate;
2473 // count is the size used in the intermediate.buffer.
2474 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
2475 // length is supposed to be <= BufferSize either because data->m11 < 1 or
2476 // data->m11 < 2, and any larger buffers split
2477 Q_ASSERT(count <= BufferSize + 2);
2478 int f = 0;
2479 int lim = count;
2480 if (blendType == BlendTransformedBilinearTiled) {
2481 x %= image.width;
2482 if (x < 0) x += image.width;
2483 } else {
2484 lim = qMin(a: count, b: image.x2 - x);
2485 if (x < image.x1) {
2486 Q_ASSERT(x < image.x2);
2487 uint t = s1[image.x1];
2488 uint b = s2[image.x1];
2489 quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2490 quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2491 do {
2492 intermediate.buffer_rb[f] = rb;
2493 intermediate.buffer_ag[f] = ag;
2494 f++;
2495 x++;
2496 } while (x < image.x1 && f < lim);
2497 }
2498 }
2499
2500 if (blendType != BlendTransformedBilinearTiled) {
2501#if defined(__SSE2__)
2502 const __m128i disty_ = _mm_set1_epi16(w: disty);
2503 const __m128i idisty_ = _mm_set1_epi16(w: idisty);
2504 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
2505
2506 lim -= 3;
2507 for (; f < lim; x += 4, f += 4) {
2508 // Load 4 pixels from s1, and split the alpha-green and red-blue component
2509 __m128i top = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s1)+x));
2510 __m128i topAG = _mm_srli_epi16(a: top, count: 8);
2511 __m128i topRB = _mm_and_si128(a: top, b: colorMask);
2512 // Multiplies each color component by idisty
2513 topAG = _mm_mullo_epi16 (a: topAG, b: idisty_);
2514 topRB = _mm_mullo_epi16 (a: topRB, b: idisty_);
2515
2516 // Same for the s2 vector
2517 __m128i bottom = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s2)+x));
2518 __m128i bottomAG = _mm_srli_epi16(a: bottom, count: 8);
2519 __m128i bottomRB = _mm_and_si128(a: bottom, b: colorMask);
2520 bottomAG = _mm_mullo_epi16 (a: bottomAG, b: disty_);
2521 bottomRB = _mm_mullo_epi16 (a: bottomRB, b: disty_);
2522
2523 // Add the values, and shift to only keep 8 significant bits per colors
2524 __m128i rAG =_mm_add_epi16(a: topAG, b: bottomAG);
2525 rAG = _mm_srli_epi16(a: rAG, count: 8);
2526 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_ag[f]), b: rAG);
2527 __m128i rRB =_mm_add_epi16(a: topRB, b: bottomRB);
2528 rRB = _mm_srli_epi16(a: rRB, count: 8);
2529 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_rb[f]), b: rRB);
2530 }
2531#elif defined(__ARM_NEON__)
2532 const int16x8_t disty_ = vdupq_n_s16(disty);
2533 const int16x8_t idisty_ = vdupq_n_s16(idisty);
2534 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2535
2536 lim -= 3;
2537 for (; f < lim; x += 4, f += 4) {
2538 // Load 4 pixels from s1, and split the alpha-green and red-blue component
2539 int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
2540 int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
2541 int16x8_t topRB = vandq_s16(top, colorMask);
2542 // Multiplies each color component by idisty
2543 topAG = vmulq_s16(topAG, idisty_);
2544 topRB = vmulq_s16(topRB, idisty_);
2545
2546 // Same for the s2 vector
2547 int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
2548 int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
2549 int16x8_t bottomRB = vandq_s16(bottom, colorMask);
2550 bottomAG = vmulq_s16(bottomAG, disty_);
2551 bottomRB = vmulq_s16(bottomRB, disty_);
2552
2553 // Add the values, and shift to only keep 8 significant bits per colors
2554 int16x8_t rAG = vaddq_s16(topAG, bottomAG);
2555 rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
2556 vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
2557 int16x8_t rRB = vaddq_s16(topRB, bottomRB);
2558 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
2559 vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
2560 }
2561#endif
2562 }
2563 for (; f < count; f++) { // Same as above but without simd
2564 if (blendType == BlendTransformedBilinearTiled) {
2565 if (x >= image.width) x -= image.width;
2566 } else {
2567 x = qMin(a: x, b: image.x2 - 1);
2568 }
2569
2570 uint t = s1[x];
2571 uint b = s2[x];
2572
2573 intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2574 intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2575 x++;
2576 }
2577
2578 // Now interpolate the values from the intermediate.buffer to get the final result.
2579 intermediate_adder(b, end, intermediate, offset, fx, fdx);
2580}
2581
2582template<TextureBlendType blendType>
2583static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
2584 int &fx, int &fy, int fdx, int /*fdy*/)
2585{
2586 int y1 = (fy >> 16);
2587 int y2;
2588 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2589 const uint *s1 = (const uint *)image.scanLine(y: y1);
2590 const uint *s2 = (const uint *)image.scanLine(y: y2);
2591 const int disty = (fy & 0x0000ffff) >> 8;
2592
2593 if (blendType != BlendTransformedBilinearTiled) {
2594 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2595 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2596 while (b < end) {
2597 int x1 = (fx >> 16);
2598 int x2;
2599 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2600 if (x1 != x2)
2601 break;
2602 uint top = s1[x1];
2603 uint bot = s2[x1];
2604 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty, y: bot, b: disty);
2605 fx += fdx;
2606 ++b;
2607 }
2608 uint *boundedEnd = end;
2609 if (fdx > 0)
2610 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2611 else if (fdx < 0)
2612 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2613
2614 // A fast middle part without boundary checks
2615 while (b < boundedEnd) {
2616 int x = (fx >> 16);
2617 int distx = (fx & 0x0000ffff) >> 8;
2618 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
2619 fx += fdx;
2620 ++b;
2621 }
2622 }
2623
2624 while (b < end) {
2625 int x1 = (fx >> 16);
2626 int x2;
2627 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
2628 uint tl = s1[x1];
2629 uint tr = s1[x2];
2630 uint bl = s2[x1];
2631 uint br = s2[x2];
2632 int distx = (fx & 0x0000ffff) >> 8;
2633 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2634
2635 fx += fdx;
2636 ++b;
2637 }
2638}
2639
2640template<TextureBlendType blendType>
2641static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
2642 int &fx, int &fy, int fdx, int /*fdy*/)
2643{
2644 int y1 = (fy >> 16);
2645 int y2;
2646 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2647 const uint *s1 = (const uint *)image.scanLine(y: y1);
2648 const uint *s2 = (const uint *)image.scanLine(y: y2);
2649 const int disty8 = (fy & 0x0000ffff) >> 8;
2650 const int disty4 = (disty8 + 0x08) >> 4;
2651
2652 if (blendType != BlendTransformedBilinearTiled) {
2653 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2654 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2655 while (b < end) {
2656 int x1 = (fx >> 16);
2657 int x2;
2658 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2659 if (x1 != x2)
2660 break;
2661 uint top = s1[x1];
2662 uint bot = s2[x1];
2663 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty8, y: bot, b: disty8);
2664 fx += fdx;
2665 ++b;
2666 }
2667 uint *boundedEnd = end;
2668 if (fdx > 0)
2669 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2670 else if (fdx < 0)
2671 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2672 // A fast middle part without boundary checks
2673#if defined(__SSE2__)
2674 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
2675 const __m128i v_256 = _mm_set1_epi16(w: 256);
2676 const __m128i v_disty = _mm_set1_epi16(w: disty4);
2677 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
2678 const __m128i v_fx_r = _mm_set1_epi32(i: 0x8);
2679 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
2680
2681 while (b < boundedEnd - 3) {
2682 __m128i offset = _mm_srli_epi32(a: v_fx, count: 16);
2683 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2684 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2685 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2686 const int offset3 = _mm_cvtsi128_si32(a: offset);
2687 const __m128i tl = _mm_setr_epi32(i0: s1[offset0], i1: s1[offset1], i2: s1[offset2], i3: s1[offset3]);
2688 const __m128i tr = _mm_setr_epi32(i0: s1[offset0 + 1], i1: s1[offset1 + 1], i2: s1[offset2 + 1], i3: s1[offset3 + 1]);
2689 const __m128i bl = _mm_setr_epi32(i0: s2[offset0], i1: s2[offset1], i2: s2[offset2], i3: s2[offset3]);
2690 const __m128i br = _mm_setr_epi32(i0: s2[offset0 + 1], i1: s2[offset1 + 1], i2: s2[offset2 + 1], i3: s2[offset3 + 1]);
2691
2692 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
2693 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fx_r), count: 4);
2694 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2695 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2696
2697 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2698 b += 4;
2699 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
2700 }
2701 fx = _mm_cvtsi128_si32(a: v_fx);
2702#elif defined(__ARM_NEON__)
2703 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2704 const int16x8_t invColorMask = vmvnq_s16(colorMask);
2705 const int16x8_t v_256 = vdupq_n_s16(256);
2706 const int16x8_t v_disty = vdupq_n_s16(disty4);
2707 const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
2708 int32x4_t v_fdx = vdupq_n_s32(fdx*4);
2709
2710 int32x4_t v_fx = vmovq_n_s32(fx);
2711 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2712 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2713 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2714
2715 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2716 const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
2717
2718 while (b < boundedEnd - 3) {
2719 uint32x4x2_t v_top, v_bot;
2720
2721 int x1 = (fx >> 16);
2722 fx += fdx;
2723 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2724 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2725 x1 = (fx >> 16);
2726 fx += fdx;
2727 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2728 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2729 x1 = (fx >> 16);
2730 fx += fdx;
2731 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2732 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2733 x1 = (fx >> 16);
2734 fx += fdx;
2735 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2736 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2737
2738 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
2739 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2740
2741 interpolate_4_pixels_16_neon(
2742 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2743 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2744 vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
2745 colorMask, invColorMask, v_256, b);
2746 b+=4;
2747 v_fx = vaddq_s32(v_fx, v_fdx);
2748 }
2749#endif
2750 while (b < boundedEnd) {
2751 int x = (fx >> 16);
2752 if (hasFastInterpolate4()) {
2753 int distx8 = (fx & 0x0000ffff) >> 8;
2754 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx: distx8, disty: disty8);
2755 } else {
2756 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2757 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx: distx4, disty: disty4);
2758 }
2759 fx += fdx;
2760 ++b;
2761 }
2762 }
2763
2764 while (b < end) {
2765 int x1 = (fx >> 16);
2766 int x2;
2767 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2768 uint tl = s1[x1];
2769 uint tr = s1[x2];
2770 uint bl = s2[x1];
2771 uint br = s2[x2];
2772 if (hasFastInterpolate4()) {
2773 int distx8 = (fx & 0x0000ffff) >> 8;
2774 *b = interpolate_4_pixels(tl, tr, bl, br, distx: distx8, disty: disty8);
2775 } else {
2776 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
2777 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx: distx4, disty: disty4);
2778 }
2779 fx += fdx;
2780 ++b;
2781 }
2782}
2783
2784template<TextureBlendType blendType>
2785static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
2786 int &fx, int &fy, int fdx, int fdy)
2787{
2788 // if we are zooming more than 8 times, we use 8bit precision for the position.
2789 while (b < end) {
2790 int x1 = (fx >> 16);
2791 int x2;
2792 int y1 = (fy >> 16);
2793 int y2;
2794
2795 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2796 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2797
2798 const uint *s1 = (const uint *)image.scanLine(y: y1);
2799 const uint *s2 = (const uint *)image.scanLine(y: y2);
2800
2801 uint tl = s1[x1];
2802 uint tr = s1[x2];
2803 uint bl = s2[x1];
2804 uint br = s2[x2];
2805
2806 int distx = (fx & 0x0000ffff) >> 8;
2807 int disty = (fy & 0x0000ffff) >> 8;
2808
2809 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2810
2811 fx += fdx;
2812 fy += fdy;
2813 ++b;
2814 }
2815}
2816
2817template<TextureBlendType blendType>
2818static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
2819 int &fx, int &fy, int fdx, int fdy)
2820{
2821 //we are zooming less than 8x, use 4bit precision
2822 if (blendType != BlendTransformedBilinearTiled) {
2823 const qint64 min_fx = qint64(image.x1) * fixed_scale;
2824 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
2825 const qint64 min_fy = qint64(image.y1) * fixed_scale;
2826 const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
2827 // first handle the possibly bounded part in the beginning
2828 while (b < end) {
2829 int x1 = (fx >> 16);
2830 int x2;
2831 int y1 = (fy >> 16);
2832 int y2;
2833 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2834 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2835 if (x1 != x2 && y1 != y2)
2836 break;
2837 const uint *s1 = (const uint *)image.scanLine(y: y1);
2838 const uint *s2 = (const uint *)image.scanLine(y: y2);
2839 uint tl = s1[x1];
2840 uint tr = s1[x2];
2841 uint bl = s2[x1];
2842 uint br = s2[x2];
2843 if (hasFastInterpolate4()) {
2844 int distx = (fx & 0x0000ffff) >> 8;
2845 int disty = (fy & 0x0000ffff) >> 8;
2846 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
2847 } else {
2848 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2849 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2850 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2851 }
2852 fx += fdx;
2853 fy += fdy;
2854 ++b;
2855 }
2856 uint *boundedEnd = end;
2857 if (fdx > 0)
2858 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
2859 else if (fdx < 0)
2860 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
2861 if (fdy > 0)
2862 boundedEnd = qMin(a: boundedEnd, b: b + (max_fy - fy) / fdy);
2863 else if (fdy < 0)
2864 boundedEnd = qMin(a: boundedEnd, b: b + (min_fy - fy) / fdy);
2865
2866 // until boundedEnd we can now have a fast middle part without boundary checks
2867#if defined(__SSE2__)
2868 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
2869 const __m128i v_256 = _mm_set1_epi16(w: 256);
2870 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
2871 const __m128i v_fdy = _mm_set1_epi32(i: fdy*4);
2872 const __m128i v_fxy_r = _mm_set1_epi32(i: 0x8);
2873 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
2874 __m128i v_fy = _mm_setr_epi32(i0: fy, i1: fy + fdy, i2: fy + fdy + fdy, i3: fy + fdy + fdy + fdy);
2875
2876 const uchar *textureData = image.imageData;
2877 const qsizetype bytesPerLine = image.bytesPerLine;
2878 const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
2879
2880 while (b < boundedEnd - 3) {
2881 const __m128i vy = _mm_packs_epi32(a: _mm_srli_epi32(a: v_fy, count: 16), b: _mm_setzero_si128());
2882 // 4x16bit * 4x16bit -> 4x32bit
2883 __m128i offset = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vy, b: vbpl), b: _mm_mulhi_epi16(a: vy, b: vbpl));
2884 offset = _mm_add_epi32(a: offset, b: _mm_srli_epi32(a: v_fx, count: 16));
2885 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2886 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2887 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
2888 const int offset3 = _mm_cvtsi128_si32(a: offset);
2889 const uint *topData = (const uint *)(textureData);
2890 const __m128i tl = _mm_setr_epi32(i0: topData[offset0], i1: topData[offset1], i2: topData[offset2], i3: topData[offset3]);
2891 const __m128i tr = _mm_setr_epi32(i0: topData[offset0 + 1], i1: topData[offset1 + 1], i2: topData[offset2 + 1], i3: topData[offset3 + 1]);
2892 const uint *bottomData = (const uint *)(textureData + bytesPerLine);
2893 const __m128i bl = _mm_setr_epi32(i0: bottomData[offset0], i1: bottomData[offset1], i2: bottomData[offset2], i3: bottomData[offset3]);
2894 const __m128i br = _mm_setr_epi32(i0: bottomData[offset0 + 1], i1: bottomData[offset1 + 1], i2: bottomData[offset2 + 1], i3: bottomData[offset3 + 1]);
2895
2896 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
2897 __m128i v_disty = _mm_srli_epi16(a: v_fy, count: 8);
2898 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fxy_r), count: 4);
2899 v_disty = _mm_srli_epi16(a: _mm_add_epi32(a: v_disty, b: v_fxy_r), count: 4);
2900 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2901 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
2902 v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2903 v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
2904
2905 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
2906 b += 4;
2907 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
2908 v_fy = _mm_add_epi32(a: v_fy, b: v_fdy);
2909 }
2910 fx = _mm_cvtsi128_si32(a: v_fx);
2911 fy = _mm_cvtsi128_si32(a: v_fy);
2912#elif defined(__ARM_NEON__)
2913 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
2914 const int16x8_t invColorMask = vmvnq_s16(colorMask);
2915 const int16x8_t v_256 = vdupq_n_s16(256);
2916 int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
2917 int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
2918
2919 const uchar *textureData = image.imageData;
2920 const int bytesPerLine = image.bytesPerLine;
2921
2922 int32x4_t v_fx = vmovq_n_s32(fx);
2923 int32x4_t v_fy = vmovq_n_s32(fy);
2924 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
2925 v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
2926 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
2927 v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
2928 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
2929 v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
2930
2931 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
2932 const int32x4_t v_round = vdupq_n_s32(0x0800);
2933
2934 while (b < boundedEnd - 3) {
2935 uint32x4x2_t v_top, v_bot;
2936
2937 int x1 = (fx >> 16);
2938 int y1 = (fy >> 16);
2939 fx += fdx; fy += fdy;
2940 const uchar *sl = textureData + bytesPerLine * y1;
2941 const uint *s1 = reinterpret_cast<const uint *>(sl);
2942 const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2943 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
2944 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
2945 x1 = (fx >> 16);
2946 y1 = (fy >> 16);
2947 fx += fdx; fy += fdy;
2948 sl = textureData + bytesPerLine * y1;
2949 s1 = reinterpret_cast<const uint *>(sl);
2950 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2951 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
2952 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
2953 x1 = (fx >> 16);
2954 y1 = (fy >> 16);
2955 fx += fdx; fy += fdy;
2956 sl = textureData + bytesPerLine * y1;
2957 s1 = reinterpret_cast<const uint *>(sl);
2958 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2959 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
2960 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
2961 x1 = (fx >> 16);
2962 y1 = (fy >> 16);
2963 fx += fdx; fy += fdy;
2964 sl = textureData + bytesPerLine * y1;
2965 s1 = reinterpret_cast<const uint *>(sl);
2966 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
2967 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
2968 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
2969
2970 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
2971 int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
2972 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
2973 v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
2974 int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
2975
2976 interpolate_4_pixels_16_neon(
2977 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
2978 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
2979 vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
2980 v_disty_, colorMask, invColorMask, v_256, b);
2981 b += 4;
2982 v_fx = vaddq_s32(v_fx, v_fdx);
2983 v_fy = vaddq_s32(v_fy, v_fdy);
2984 }
2985#endif
2986 while (b < boundedEnd) {
2987 int x = (fx >> 16);
2988 int y = (fy >> 16);
2989
2990 const uint *s1 = (const uint *)image.scanLine(y);
2991 const uint *s2 = (const uint *)image.scanLine(y: y + 1);
2992
2993 if (hasFastInterpolate4()) {
2994 int distx = (fx & 0x0000ffff) >> 8;
2995 int disty = (fy & 0x0000ffff) >> 8;
2996 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
2997 } else {
2998 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2999 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3000 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx, disty);
3001 }
3002
3003 fx += fdx;
3004 fy += fdy;
3005 ++b;
3006 }
3007 }
3008
3009 while (b < end) {
3010 int x1 = (fx >> 16);
3011 int x2;
3012 int y1 = (fy >> 16);
3013 int y2;
3014
3015 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3016 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3017
3018 const uint *s1 = (const uint *)image.scanLine(y: y1);
3019 const uint *s2 = (const uint *)image.scanLine(y: y2);
3020
3021 uint tl = s1[x1];
3022 uint tr = s1[x2];
3023 uint bl = s2[x1];
3024 uint br = s2[x2];
3025
3026 if (hasFastInterpolate4()) {
3027 int distx = (fx & 0x0000ffff) >> 8;
3028 int disty = (fy & 0x0000ffff) >> 8;
3029 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3030 } else {
3031 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3032 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3033 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3034 }
3035
3036 fx += fdx;
3037 fy += fdy;
3038 ++b;
3039 }
3040}
3041
3042
3043static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
3044 {
3045 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
3046 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
3047 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
3048 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
3049 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
3050 },
3051 {
3052 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
3053 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
3054 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
3055 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
3056 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
3057 }
3058};
3059
3060template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
3061static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
3062 const QSpanData *data, int y, int x,
3063 int length)
3064{
3065 const qreal cx = x + qreal(0.5);
3066 const qreal cy = y + qreal(0.5);
3067 Q_CONSTEXPR int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
3068
3069 uint *end = buffer + length;
3070 uint *b = buffer;
3071 if (canUseFastMatrixPath(cx, cy, length, data)) {
3072 // The increment pr x in the scanline
3073 int fdx = (int)(data->m11 * fixed_scale);
3074 int fdy = (int)(data->m12 * fixed_scale);
3075
3076 int fx = int((data->m21 * cy
3077 + data->m11 * cx + data->dx) * fixed_scale);
3078 int fy = int((data->m22 * cy
3079 + data->m12 * cx + data->dy) * fixed_scale);
3080
3081 fx -= half_point;
3082 fy -= half_point;
3083
3084 if (fdy == 0) { // simple scale, no rotation or shear
3085 if (qAbs(t: fdx) <= fixed_scale) {
3086 // simple scale up on X
3087 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3088 } else if (qAbs(t: fdx) <= 2 * fixed_scale) {
3089 // simple scale down on X, less than 2x
3090 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3091 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3092 if (mid != length)
3093 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3094 } else if (qAbs(t: data->m22) < qreal(1./8.)) {
3095 // scale up more than 8x (on Y)
3096 bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3097 } else {
3098 // scale down on X
3099 bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
3100 }
3101 } else { // rotation or shear
3102 if (qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.) ) {
3103 // if we are zooming more than 8 times, we use 8bit precision for the position.
3104 bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3105 } else {
3106 // we are zooming less than 8x, use 4bit precision
3107 bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
3108 }
3109 }
3110 } else {
3111 const QTextureData &image = data->texture;
3112
3113 const qreal fdx = data->m11;
3114 const qreal fdy = data->m12;
3115 const qreal fdw = data->m13;
3116
3117 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3118 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3119 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3120
3121 while (b < end) {
3122 const qreal iw = fw == 0 ? 1 : 1 / fw;
3123 const qreal px = fx * iw - qreal(0.5);
3124 const qreal py = fy * iw - qreal(0.5);
3125
3126 int x1 = int(px) - (px < 0);
3127 int x2;
3128 int y1 = int(py) - (py < 0);
3129 int y2;
3130
3131 int distx = int((px - x1) * 256);
3132 int disty = int((py - y1) * 256);
3133
3134 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3135 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3136
3137 const uint *s1 = (const uint *)data->texture.scanLine(y: y1);
3138 const uint *s2 = (const uint *)data->texture.scanLine(y: y2);
3139
3140 uint tl = s1[x1];
3141 uint tr = s1[x2];
3142 uint bl = s2[x1];
3143 uint br = s2[x2];
3144
3145 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
3146
3147 fx += fdx;
3148 fy += fdy;
3149 fw += fdw;
3150 //force increment to avoid /0
3151 if (!fw) {
3152 fw += fdw;
3153 }
3154 ++b;
3155 }
3156 }
3157
3158 return buffer;
3159}
3160
3161template<TextureBlendType blendType>
3162static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
3163 int &fx, int &fy, int fdx, int /*fdy*/)
3164{
3165 const QPixelLayout *layout = &qPixelLayouts[image.format];
3166 const QVector<QRgb> *clut = image.colorTable;
3167 const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
3168
3169 int y1 = (fy >> 16);
3170 int y2;
3171 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3172 const uchar *s1 = image.scanLine(y: y1);
3173 const uchar *s2 = image.scanLine(y: y2);
3174
3175 const int disty = (fy & 0x0000ffff) >> 8;
3176 const int idisty = 256 - disty;
3177 const int length = end - b;
3178
3179 // The intermediate buffer is generated in the positive direction
3180 const int adjust = (fdx < 0) ? fdx * length : 0;
3181 const int offset = (fx + adjust) >> 16;
3182 int x = offset;
3183
3184 IntermediateBuffer intermediate;
3185 uint *buf1 = intermediate.buffer_rb;
3186 uint *buf2 = intermediate.buffer_ag;
3187 const uint *ptr1;
3188 const uint *ptr2;
3189
3190 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
3191 Q_ASSERT(count <= BufferSize + 2);
3192
3193 if (blendType == BlendTransformedBilinearTiled) {
3194 x %= image.width;
3195 if (x < 0)
3196 x += image.width;
3197 int len1 = qMin(a: count, b: image.width - x);
3198 int len2 = qMin(a: x, b: count - len1);
3199
3200 ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
3201 ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
3202 for (int i = 0; i < len1; ++i) {
3203 uint t = ptr1[i];
3204 uint b = ptr2[i];
3205 buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3206 buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3207 }
3208
3209 if (len2) {
3210 ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr);
3211 ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr);
3212 for (int i = 0; i < len2; ++i) {
3213 uint t = ptr1[i];
3214 uint b = ptr2[i];
3215 buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3216 buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3217 }
3218 }
3219 // Generate the rest by repeatedly repeating the previous set of pixels
3220 for (int i = image.width; i < count; ++i) {
3221 buf1[i] = buf1[i - image.width];
3222 buf2[i] = buf2[i - image.width];
3223 }
3224 } else {
3225 int start = qMax(a: x, b: image.x1);
3226 int end = qMin(a: x + count, b: image.x2);
3227 int len = qMax(a: 1, b: end - start);
3228 int leading = start - x;
3229
3230 ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
3231 ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
3232
3233 for (int i = 0; i < len; ++i) {
3234 uint t = ptr1[i];
3235 uint b = ptr2[i];
3236 buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
3237 buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
3238 }
3239
3240 for (int i = 0; i < leading; ++i) {
3241 buf1[i] = buf1[leading];
3242 buf2[i] = buf2[leading];
3243 }
3244 for (int i = leading + len; i < count; ++i) {
3245 buf1[i] = buf1[i - 1];
3246 buf2[i] = buf2[i - 1];
3247 }
3248 }
3249
3250 // Now interpolate the values from the intermediate.buffer to get the final result.
3251 intermediate_adder(b, end, intermediate, offset, fx, fdx);
3252}
3253
3254
3255template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
3256static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
3257 int fx, int fy, const int fdx, const int fdy)
3258{
3259 const QPixelLayout &layout = qPixelLayouts[image.format];
3260 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
3261 if (useFetch)
3262 Q_ASSERT(sizeof(T) == sizeof(uint));
3263 else
3264 Q_ASSERT(layout.bpp == bpp);
3265 const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout.bpp] : fetchPixel<bpp>;
3266 if (fdy == 0) {
3267 int y1 = (fy >> 16);
3268 int y2;
3269 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3270 const uchar *s1 = image.scanLine(y: y1);
3271 const uchar *s2 = image.scanLine(y: y2);
3272
3273 int i = 0;
3274 if (blendType == BlendTransformedBilinear) {
3275 for (; i < len; ++i) {
3276 int x1 = (fx >> 16);
3277 int x2;
3278 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3279 if (x1 != x2)
3280 break;
3281 if (useFetch) {
3282 buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
3283 buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
3284 } else {
3285 buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
3286 buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
3287 }
3288 fx += fdx;
3289 }
3290 int fastLen = len;
3291 if (fdx > 0)
3292 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3293 else if (fdx < 0)
3294 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
3295
3296 for (; i < fastLen; ++i) {
3297 int x = (fx >> 16);
3298 if (useFetch) {
3299 buf1[i * 2 + 0] = fetch1(s1, x);
3300 buf1[i * 2 + 1] = fetch1(s1, x + 1);
3301 buf2[i * 2 + 0] = fetch1(s2, x);
3302 buf2[i * 2 + 1] = fetch1(s2, x + 1);
3303 } else {
3304 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3305 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3306 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3307 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3308 }
3309 fx += fdx;
3310 }
3311 }
3312
3313 for (; i < len; ++i) {
3314 int x1 = (fx >> 16);
3315 int x2;
3316 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3317 if (useFetch) {
3318 buf1[i * 2 + 0] = fetch1(s1, x1);
3319 buf1[i * 2 + 1] = fetch1(s1, x2);
3320 buf2[i * 2 + 0] = fetch1(s2, x1);
3321 buf2[i * 2 + 1] = fetch1(s2, x2);
3322 } else {
3323 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3324 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3325 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3326 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3327 }
3328 fx += fdx;
3329 }
3330 } else {
3331 int i = 0;
3332 if (blendType == BlendTransformedBilinear) {
3333 for (; i < len; ++i) {
3334 int x1 = (fx >> 16);
3335 int x2;
3336 int y1 = (fy >> 16);
3337 int y2;
3338 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3339 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3340 if (x1 != x2 && y1 != y2)
3341 break;
3342 const uchar *s1 = image.scanLine(y: y1);
3343 const uchar *s2 = image.scanLine(y: y2);
3344 if (useFetch) {
3345 buf1[i * 2 + 0] = fetch1(s1, x1);
3346 buf1[i * 2 + 1] = fetch1(s1, x2);
3347 buf2[i * 2 + 0] = fetch1(s2, x1);
3348 buf2[i * 2 + 1] = fetch1(s2, x2);
3349 } else {
3350 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3351 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3352 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3353 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3354 }
3355 fx += fdx;
3356 fy += fdy;
3357 }
3358 int fastLen = len;
3359 if (fdx > 0)
3360 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
3361 else if (fdx < 0)
3362 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
3363 if (fdy > 0)
3364 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
3365 else if (fdy < 0)
3366 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
3367
3368 for (; i < fastLen; ++i) {
3369 int x = (fx >> 16);
3370 int y = (fy >> 16);
3371 const uchar *s1 = image.scanLine(y);
3372 const uchar *s2 = s1 + image.bytesPerLine;
3373 if (useFetch) {
3374 buf1[i * 2 + 0] = fetch1(s1, x);
3375 buf1[i * 2 + 1] = fetch1(s1, x + 1);
3376 buf2[i * 2 + 0] = fetch1(s2, x);
3377 buf2[i * 2 + 1] = fetch1(s2, x + 1);
3378 } else {
3379 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
3380 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
3381 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
3382 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
3383 }
3384 fx += fdx;
3385 fy += fdy;
3386 }
3387 }
3388
3389 for (; i < len; ++i) {
3390 int x1 = (fx >> 16);
3391 int x2;
3392 int y1 = (fy >> 16);
3393 int y2;
3394 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3395 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3396
3397 const uchar *s1 = image.scanLine(y: y1);
3398 const uchar *s2 = image.scanLine(y: y2);
3399 if (useFetch) {
3400 buf1[i * 2 + 0] = fetch1(s1, x1);
3401 buf1[i * 2 + 1] = fetch1(s1, x2);
3402 buf2[i * 2 + 0] = fetch1(s2, x1);
3403 buf2[i * 2 + 1] = fetch1(s2, x2);
3404 } else {
3405 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
3406 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
3407 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
3408 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
3409 }
3410 fx += fdx;
3411 fy += fdy;
3412 }
3413 }
3414}
3415
3416// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
3417template<TextureBlendType blendType, QPixelLayout::BPP bpp>
3418static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
3419 const QSpanData *data, int y, int x, int length)
3420{
3421 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
3422 const QVector<QRgb> *clut = data->texture.colorTable;
3423 Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
3424
3425 const qreal cx = x + qreal(0.5);
3426 const qreal cy = y + qreal(0.5);
3427
3428 if (canUseFastMatrixPath(cx, cy, length, data)) {
3429 // The increment pr x in the scanline
3430 int fdx = (int)(data->m11 * fixed_scale);
3431 int fdy = (int)(data->m12 * fixed_scale);
3432
3433 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3434 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3435
3436 fx -= half_point;
3437 fy -= half_point;
3438
3439 if (fdy == 0) { // simple scale, no rotation or shear
3440 if (qAbs(t: fdx) <= fixed_scale) { // scale up on X
3441 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
3442 } else if (qAbs(t: fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
3443 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
3444 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
3445 if (mid != length)
3446 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
3447 } else {
3448 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3449
3450 uint buf1[BufferSize];
3451 uint buf2[BufferSize];
3452 uint *b = buffer;
3453 while (length) {
3454 int len = qMin(a: length, b: BufferSize / 2);
3455 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
3456 layout->convertToARGB32PM(buf1, len * 2, clut);
3457 layout->convertToARGB32PM(buf2, len * 2, clut);
3458
3459 if (hasFastInterpolate4() || qAbs(t: data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
3460 int disty = (fy & 0x0000ffff) >> 8;
3461 for (int i = 0; i < len; ++i) {
3462 int distx = (fx & 0x0000ffff) >> 8;
3463 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
3464 fx += fdx;
3465 }
3466 } else {
3467 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3468 for (int i = 0; i < len; ++i) {
3469 uint tl = buf1[i * 2 + 0];
3470 uint tr = buf1[i * 2 + 1];
3471 uint bl = buf2[i * 2 + 0];
3472 uint br = buf2[i * 2 + 1];
3473 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3474 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3475 fx += fdx;
3476 }
3477 }
3478 length -= len;
3479 b += len;
3480 }
3481 }
3482 } else { // rotation or shear
3483 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
3484
3485 uint buf1[BufferSize];
3486 uint buf2[BufferSize];
3487 uint *b = buffer;
3488 while (length) {
3489 int len = qMin(a: length, b: BufferSize / 2);
3490 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3491 layout->convertToARGB32PM(buf1, len * 2, clut);
3492 layout->convertToARGB32PM(buf2, len * 2, clut);
3493
3494 if (hasFastInterpolate4() || qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.)) {
3495 // If we are zooming more than 8 times, we use 8bit precision for the position.
3496 for (int i = 0; i < len; ++i) {
3497 int distx = (fx & 0x0000ffff) >> 8;
3498 int disty = (fy & 0x0000ffff) >> 8;
3499
3500 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
3501 fx += fdx;
3502 fy += fdy;
3503 }
3504 } else {
3505 // We are zooming less than 8x, use 4bit precision
3506 for (int i = 0; i < len; ++i) {
3507 uint tl = buf1[i * 2 + 0];
3508 uint tr = buf1[i * 2 + 1];
3509 uint bl = buf2[i * 2 + 0];
3510 uint br = buf2[i * 2 + 1];
3511
3512 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
3513 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
3514
3515 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
3516 fx += fdx;
3517 fy += fdy;
3518 }
3519 }
3520
3521 length -= len;
3522 b += len;
3523 }
3524 }
3525 } else {
3526 // When templated 'fetch' should be inlined at compile time:
3527 const FetchPixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? qFetchPixel[layout->bpp] : fetchPixel<bpp>;
3528
3529 const QTextureData &image = data->texture;
3530
3531 const qreal fdx = data->m11;
3532 const qreal fdy = data->m12;
3533 const qreal fdw = data->m13;
3534
3535 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3536 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3537 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3538
3539 uint buf1[BufferSize];
3540 uint buf2[BufferSize];
3541 uint *b = buffer;
3542
3543 int distxs[BufferSize / 2];
3544 int distys[BufferSize / 2];
3545
3546 while (length) {
3547 int len = qMin(a: length, b: BufferSize / 2);
3548 for (int i = 0; i < len; ++i) {
3549 const qreal iw = fw == 0 ? 1 : 1 / fw;
3550 const qreal px = fx * iw - qreal(0.5);
3551 const qreal py = fy * iw - qreal(0.5);
3552
3553 int x1 = int(px) - (px < 0);
3554 int x2;
3555 int y1 = int(py) - (py < 0);
3556 int y2;
3557
3558 distxs[i] = int((px - x1) * 256);
3559 distys[i] = int((py - y1) * 256);
3560
3561 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3562 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3563
3564 const uchar *s1 = data->texture.scanLine(y: y1);
3565 const uchar *s2 = data->texture.scanLine(y: y2);
3566 buf1[i * 2 + 0] = fetch1(s1, x1);
3567 buf1[i * 2 + 1] = fetch1(s1, x2);
3568 buf2[i * 2 + 0] = fetch1(s2, x1);
3569 buf2[i * 2 + 1] = fetch1(s2, x2);
3570
3571 fx += fdx;
3572 fy += fdy;
3573 fw += fdw;
3574 //force increment to avoid /0
3575 if (!fw)
3576 fw += fdw;
3577 }
3578
3579 layout->convertToARGB32PM(buf1, len * 2, clut);
3580 layout->convertToARGB32PM(buf2, len * 2, clut);
3581
3582 for (int i = 0; i < len; ++i) {
3583 int distx = distxs[i];
3584 int disty = distys[i];
3585
3586 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
3587 }
3588 length -= len;
3589 b += len;
3590 }
3591 }
3592
3593 return buffer;
3594}
3595
3596#if QT_CONFIG(raster_64bit)
3597template<TextureBlendType blendType>
3598static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
3599 int y, int x, int length)
3600{
3601 const QTextureData &texture = data->texture;
3602 const QPixelLayout *layout = &qPixelLayouts[texture.format];
3603 const QVector<QRgb> *clut = data->texture.colorTable;
3604
3605 const qreal cx = x + qreal(0.5);
3606 const qreal cy = y + qreal(0.5);
3607
3608 uint sbuf1[BufferSize];
3609 uint sbuf2[BufferSize];
3610 alignas(8) QRgba64 buf1[BufferSize];
3611 alignas(8) QRgba64 buf2[BufferSize];
3612 QRgba64 *end = buffer + length;
3613 QRgba64 *b = buffer;
3614
3615 if (canUseFastMatrixPath(cx, cy, length, data)) {
3616 // The increment pr x in the scanline
3617 const int fdx = (int)(data->m11 * fixed_scale);
3618 const int fdy = (int)(data->m12 * fixed_scale);
3619
3620 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3621 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3622
3623 fx -= half_point;
3624 fy -= half_point;
3625
3626 const auto fetcher =
3627 (layout->bpp == QPixelLayout::BPP32)
3628 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
3629 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
3630
3631 if (fdy == 0) { //simple scale, no rotation
3632 while (length) {
3633 int len = qMin(a: length, b: BufferSize / 2);
3634 int disty = (fy & 0x0000ffff);
3635#if defined(__SSE2__)
3636 const __m128i vdy = _mm_set1_epi16(w: disty);
3637 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
3638#endif
3639 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3640
3641 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3642 if (disty)
3643 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3644
3645 for (int i = 0; i < len; ++i) {
3646 int distx = (fx & 0x0000ffff);
3647#if defined(__SSE2__)
3648 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
3649 if (disty) {
3650 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
3651 vt = _mm_mulhi_epu16(a: vt, b: vidy);
3652 vb = _mm_mulhi_epu16(a: vb, b: vdy);
3653 vt = _mm_add_epi16(a: vt, b: vb);
3654 }
3655 if (distx) {
3656 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3657 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3658 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
3659 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
3660 }
3661 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
3662#else
3663 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3664#endif
3665 fx += fdx;
3666 }
3667 length -= len;
3668 b += len;
3669 }
3670 } else { // rotation or shear
3671 while (b < end) {
3672 int len = qMin(a: length, b: BufferSize / 2);
3673
3674 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
3675
3676 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3677 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3678
3679 for (int i = 0; i < len; ++i) {
3680 int distx = (fx & 0x0000ffff);
3681 int disty = (fy & 0x0000ffff);
3682 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
3683 fx += fdx;
3684 fy += fdy;
3685 }
3686
3687 length -= len;
3688 b += len;
3689 }
3690 }
3691 } else { // !(data->fast_matrix)
3692 const QTextureData &image = data->texture;
3693
3694 const qreal fdx = data->m11;
3695 const qreal fdy = data->m12;
3696 const qreal fdw = data->m13;
3697
3698 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3699 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3700 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3701
3702 FetchPixelFunc fetch = qFetchPixel[layout->bpp];
3703
3704 int distxs[BufferSize / 2];
3705 int distys[BufferSize / 2];
3706
3707 while (b < end) {
3708 int len = qMin(a: length, b: BufferSize / 2);
3709 for (int i = 0; i < len; ++i) {
3710 const qreal iw = fw == 0 ? 1 : 1 / fw;
3711 const qreal px = fx * iw - qreal(0.5);
3712 const qreal py = fy * iw - qreal(0.5);
3713
3714 int x1 = qFloor(v: px);
3715 int x2;
3716 int y1 = qFloor(v: py);
3717 int y2;
3718
3719 distxs[i] = int((px - x1) * (1<<16));
3720 distys[i] = int((py - y1) * (1<<16));
3721
3722 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3723 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3724
3725 const uchar *s1 = texture.scanLine(y: y1);
3726 const uchar *s2 = texture.scanLine(y: y2);
3727
3728 sbuf1[i * 2 + 0] = fetch(s1, x1);
3729 sbuf1[i * 2 + 1] = fetch(s1, x2);
3730 sbuf2[i * 2 + 0] = fetch(s2, x1);
3731 sbuf2[i * 2 + 1] = fetch(s2, x2);
3732
3733 fx += fdx;
3734 fy += fdy;
3735 fw += fdw;
3736 //force increment to avoid /0
3737 if (!fw)
3738 fw += fdw;
3739 }
3740
3741 layout->convertToRGBA64PM(buf1, sbuf1, len * 2, clut, nullptr);
3742 layout->convertToRGBA64PM(buf2, sbuf2, len * 2, clut, nullptr);
3743
3744 for (int i = 0; i < len; ++i) {
3745 int distx = distxs[i];
3746 int disty = distys[i];
3747 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
3748 }
3749
3750 length -= len;
3751 b += len;
3752 }
3753 }
3754 return buffer;
3755}
3756
3757template<TextureBlendType blendType>
3758static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
3759 int y, int x, int length)
3760{
3761 const QTextureData &texture = data->texture;
3762 Q_ASSERT(qPixelLayouts[texture.format].bpp == QPixelLayout::BPP64);
3763 const auto convert = (data->texture.format == QImage::Format_RGBA64) ? convertRGBA64ToRGBA64PM : convertRGBA64PMToRGBA64PM;
3764
3765 const qreal cx = x + qreal(0.5);
3766 const qreal cy = y + qreal(0.5);
3767
3768 alignas(8) QRgba64 buf1[BufferSize];
3769 alignas(8) QRgba64 buf2[BufferSize];
3770 QRgba64 *end = buffer + length;
3771 QRgba64 *b = buffer;
3772
3773 if (canUseFastMatrixPath(cx, cy, length, data)) {
3774 // The increment pr x in the scanline
3775 const int fdx = (int)(data->m11 * fixed_scale);
3776 const int fdy = (int)(data->m12 * fixed_scale);
3777
3778 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3779 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3780
3781 fx -= half_point;
3782 fy -= half_point;
3783 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
3784
3785 if (fdy == 0) { //simple scale, no rotation
3786 while (length) {
3787 int len = qMin(a: length, b: BufferSize / 2);
3788 int disty = (fy & 0x0000ffff);
3789#if defined(__SSE2__)
3790 const __m128i vdy = _mm_set1_epi16(w: disty);
3791 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
3792#endif
3793 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3794
3795 convert(buf1, len * 2);
3796 if (disty)
3797 convert(buf2, len * 2);
3798
3799 for (int i = 0; i < len; ++i) {
3800 int distx = (fx & 0x0000ffff);
3801#if defined(__SSE2__)
3802 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
3803 if (disty) {
3804 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
3805 vt = _mm_mulhi_epu16(a: vt, b: vidy);
3806 vb = _mm_mulhi_epu16(a: vb, b: vdy);
3807 vt = _mm_add_epi16(a: vt, b: vb);
3808 }
3809 if (distx) {
3810 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
3811 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
3812 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
3813 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
3814 }
3815 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
3816#else
3817 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
3818#endif
3819 fx += fdx;
3820 }
3821 length -= len;
3822 b += len;
3823 }
3824 } else { // rotation or shear
3825 while (b < end) {
3826 int len = qMin(a: length, b: BufferSize / 2);
3827
3828 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3829
3830 convert(buf1, len * 2);
3831 convert(buf2, len * 2);
3832
3833 for (int i = 0; i < len; ++i) {
3834 int distx = (fx & 0x0000ffff);
3835 int disty = (fy & 0x0000ffff);
3836 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
3837 fx += fdx;
3838 fy += fdy;
3839 }
3840
3841 length -= len;
3842 b += len;
3843 }
3844 }
3845 } else { // !(data->fast_matrix)
3846 const QTextureData &image = data->texture;
3847
3848 const qreal fdx = data->m11;
3849 const qreal fdy = data->m12;
3850 const qreal fdw = data->m13;
3851
3852 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3853 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3854 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3855
3856 int distxs[BufferSize / 2];
3857 int distys[BufferSize / 2];
3858
3859 while (b < end) {
3860 int len = qMin(a: length, b: BufferSize / 2);
3861 for (int i = 0; i < len; ++i) {
3862 const qreal iw = fw == 0 ? 1 : 1 / fw;
3863 const qreal px = fx * iw - qreal(0.5);
3864 const qreal py = fy * iw - qreal(0.5);
3865
3866 int x1 = int(px) - (px < 0);
3867 int x2;
3868 int y1 = int(py) - (py < 0);
3869 int y2;
3870
3871 distxs[i] = int((px - x1) * (1<<16));
3872 distys[i] = int((py - y1) * (1<<16));
3873
3874 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
3875 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
3876
3877 const uchar *s1 = texture.scanLine(y: y1);
3878 const uchar *s2 = texture.scanLine(y: y2);
3879
3880 buf1[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s1)[x1];
3881 buf1[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s1)[x2];
3882 buf2[i * 2 + 0] = reinterpret_cast<const QRgba64 *>(s2)[x1];
3883 buf2[i * 2 + 1] = reinterpret_cast<const QRgba64 *>(s2)[x2];
3884
3885 fx += fdx;
3886 fy += fdy;
3887 fw += fdw;
3888 //force increment to avoid /0
3889 if (!fw)
3890 fw += fdw;
3891 }
3892
3893 convert(buf1, len * 2);
3894 convert(buf2, len * 2);
3895
3896 for (int i = 0; i < len; ++i) {
3897 int distx = distxs[i];
3898 int disty = distys[i];
3899 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
3900 }
3901
3902 length -= len;
3903 b += len;
3904 }
3905 }
3906 return buffer;
3907}
3908
3909template<TextureBlendType blendType>
3910static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
3911 const QSpanData *data, int y, int x, int length)
3912{
3913 if (qPixelLayouts[data->texture.format].bpp == QPixelLayout::BPP64)
3914 return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
3915 return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
3916}
3917#endif
3918
3919// FetchUntransformed can have more specialized methods added depending on SIMD features.
3920static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
3921 nullptr, // Invalid
3922 fetchUntransformed, // Mono
3923 fetchUntransformed, // MonoLsb
3924 fetchUntransformed, // Indexed8
3925 fetchUntransformedARGB32PM, // RGB32
3926 fetchUntransformed, // ARGB32
3927 fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3928 fetchUntransformedRGB16, // RGB16
3929 fetchUntransformed, // ARGB8565_Premultiplied
3930 fetchUntransformed, // RGB666
3931 fetchUntransformed, // ARGB6666_Premultiplied
3932 fetchUntransformed, // RGB555
3933 fetchUntransformed, // ARGB8555_Premultiplied
3934 fetchUntransformed, // RGB888
3935 fetchUntransformed, // RGB444
3936 fetchUntransformed, // ARGB4444_Premultiplied
3937 fetchUntransformed, // RGBX8888
3938 fetchUntransformed, // RGBA8888
3939 fetchUntransformed, // RGBA8888_Premultiplied
3940 fetchUntransformed, // Format_BGR30
3941 fetchUntransformed, // Format_A2BGR30_Premultiplied
3942 fetchUntransformed, // Format_RGB30
3943 fetchUntransformed, // Format_A2RGB30_Premultiplied
3944 fetchUntransformed, // Alpha8
3945 fetchUntransformed, // Grayscale8
3946 fetchUntransformed, // RGBX64
3947 fetchUntransformed, // RGBA64
3948 fetchUntransformed, // RGBA64_Premultiplied
3949 fetchUntransformed, // Grayscale16
3950 fetchUntransformed, // BGR888
3951};
3952
3953static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
3954 fetchUntransformed, // Untransformed
3955 fetchUntransformed, // Tiled
3956 fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
3957 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
3958 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
3959 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
3960};
3961
3962static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
3963 fetchUntransformedARGB32PM, // Untransformed
3964 fetchUntransformedARGB32PM, // Tiled
3965 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3966 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3967 fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
3968 fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3969};
3970
3971static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
3972 fetchUntransformed, // Untransformed
3973 fetchUntransformed, // Tiled
3974 fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
3975 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
3976 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
3977 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
3978};
3979
3980static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
3981 fetchUntransformed, // Untransformed
3982 fetchUntransformed, // Tiled
3983 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3984 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3985 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
3986 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
3987};
3988
3989static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3990{
3991 if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
3992 return sourceFetchARGB32PM[blendType];
3993 if (blendType == BlendUntransformed || blendType == BlendTiled)
3994 return sourceFetchUntransformed[format];
3995 if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3996 return sourceFetchAny16[blendType];
3997 if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3998 return sourceFetchAny32[blendType];
3999 return sourceFetchGeneric[blendType];
4000}
4001
4002#if QT_CONFIG(raster_64bit)
4003static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
4004 fetchUntransformed64, // Untransformed
4005 fetchUntransformed64, // Tiled
4006 fetchTransformed64<BlendTransformed>, // Transformed
4007 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
4008 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
4009 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4010};
4011
4012static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
4013 fetchUntransformedRGBA64PM, // Untransformed
4014 fetchUntransformedRGBA64PM, // Tiled
4015 fetchTransformed64<BlendTransformed>, // Transformed
4016 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
4017 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
4018 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
4019};
4020
4021static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
4022{
4023 if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
4024 return sourceFetchRGBA64PM[blendType];
4025 return sourceFetchGeneric64[blendType];
4026}
4027#endif
4028
4029
4030#define FIXPT_BITS 8
4031#define FIXPT_SIZE (1<<FIXPT_BITS)
4032#define FIXPT_MAX (INT_MAX >> (FIXPT_BITS + 1))
4033
4034static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
4035{
4036 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4037 return data->colorTable32[qt_gradient_clamp(data, ipos)];
4038}
4039
4040#if QT_CONFIG(raster_64bit)
4041static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData *data, int fixed_pos)
4042{
4043 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
4044 return data->colorTable64[qt_gradient_clamp(data, ipos)];
4045}
4046#endif
4047
4048static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
4049{
4050 v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
4051 v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
4052 v->l = v->dx * v->dx + v->dy * v->dy;
4053 v->off = 0;
4054 if (v->l != 0) {
4055 v->dx /= v->l;
4056 v->dy /= v->l;
4057 v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
4058 }
4059}
4060
4061class GradientBase32
4062{
4063public:
4064 typedef uint Type;
4065 static Type null() { return 0; }
4066 static Type fetchSingle(const QGradientData& gradient, qreal v)
4067 {
4068 return qt_gradient_pixel(data: &gradient, pos: v);
4069 }
4070 static Type fetchSingle(const QGradientData& gradient, int v)
4071 {
4072 return qt_gradient_pixel_fixed(data: &gradient, fixed_pos: v);
4073 }
4074 static void memfill(Type *buffer, Type fill, int length)
4075 {
4076 qt_memfill32(buffer, fill, length);
4077 }
4078};
4079
4080#if QT_CONFIG(raster_64bit)
4081class GradientBase64
4082{
4083public:
4084 typedef QRgba64 Type;
4085 static Type null() { return QRgba64::fromRgba64(c: 0); }
4086 static Type fetchSingle(const QGradientData& gradient, qreal v)
4087 {
4088 return qt_gradient_pixel64(data: &gradient, pos: v);
4089 }
4090 static Type fetchSingle(const QGradientData& gradient, int v)
4091 {
4092 return qt_gradient_pixel64_fixed(data: &gradient, fixed_pos: v);
4093 }
4094 static void memfill(Type *buffer, Type fill, int length)
4095 {
4096 qt_memfill64((quint64*)buffer, fill, length);
4097 }
4098};
4099#endif
4100
4101template<class GradientBase, typename BlendType>
4102static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
4103 BlendType *buffer, const Operator *op, const QSpanData *data,
4104 int y, int x, int length)
4105{
4106 const BlendType *b = buffer;
4107 qreal t, inc;
4108
4109 bool affine = true;
4110 qreal rx=0, ry=0;
4111 if (op->linear.l == 0) {
4112 t = inc = 0;
4113 } else {
4114 rx = data->m21 * (y + qreal(0.5)) + data->m11 * (x + qreal(0.5)) + data->dx;
4115 ry = data->m22 * (y + qreal(0.5)) + data->m12 * (x + qreal(0.5)) + data->dy;
4116 t = op->linear.dx*rx + op->linear.dy*ry + op->linear.off;
4117 inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
4118 affine = !data->m13 && !data->m23;
4119
4120 if (affine) {
4121 t *= (GRADIENT_STOPTABLE_SIZE - 1);
4122 inc *= (GRADIENT_STOPTABLE_SIZE - 1);
4123 }
4124 }
4125
4126 const BlendType *end = buffer + length;
4127 if (affine) {
4128 if (inc > qreal(-1e-5) && inc < qreal(1e-5)) {
4129 if (std::abs(x: t) < FIXPT_MAX)
4130 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
4131 else
4132 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, t / GRADIENT_STOPTABLE_SIZE), length);
4133 } else {
4134 if (std::abs(x: t) < FIXPT_MAX && std::abs(x: inc) < FIXPT_MAX && std::abs(x: t + inc * length) < FIXPT_MAX) {
4135 // we can use fixed point math
4136 int t_fixed = int(t * FIXPT_SIZE);
4137 int inc_fixed = int(inc * FIXPT_SIZE);
4138 while (buffer < end) {
4139 *buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
4140 t_fixed += inc_fixed;
4141 ++buffer;
4142 }
4143 } else {
4144 // we have to fall back to float math
4145 while (buffer < end) {
4146 *buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
4147 t += inc;
4148 ++buffer;
4149 }
4150 }
4151 }
4152 } else { // fall back to float math here as well
4153 qreal rw = data->m23 * (y + qreal(0.5)) + data->m13 * (x + qreal(0.5)) + data->m33;
4154 while (buffer < end) {
4155 qreal x = rx/rw;
4156 qreal y = ry/rw;
4157 t = (op->linear.dx*x + op->linear.dy *y) + op->linear.off;
4158
4159 *buffer = GradientBase::fetchSingle(data->gradient, t);
4160 rx += data->m11;
4161 ry += data->m12;
4162 rw += data->m13;
4163 if (!rw) {
4164 rw += data->m13;
4165 }
4166 ++buffer;
4167 }
4168 }
4169
4170 return b;
4171}
4172
4173static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
4174 int y, int x, int length)
4175{
4176 return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
4177}
4178
4179#if QT_CONFIG(raster_64bit)
4180static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4181 int y, int x, int length)
4182{
4183 return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
4184}
4185#endif
4186
4187static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
4188{
4189 v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
4190 v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
4191
4192 v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
4193 v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
4194
4195 v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy;
4196 v->inv2a = 1 / (2 * v->a);
4197
4198 v->extended = !qFuzzyIsNull(d: data->gradient.radial.focal.radius) || v->a <= 0;
4199}
4200
4201template <class GradientBase>
4202class RadialFetchPlain : public GradientBase
4203{
4204public:
4205 typedef typename GradientBase::Type BlendType;
4206 static void fetch(BlendType *buffer, BlendType *end,
4207 const Operator *op, const QSpanData *data, qreal det,
4208 qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
4209 {
4210 if (op->radial.extended) {
4211 while (buffer < end) {
4212 BlendType result = GradientBase::null();
4213 if (det >= 0) {
4214 qreal w = qSqrt(v: det) - b;
4215 if (data->gradient.radial.focal.radius + op->radial.dr * w >= 0)
4216 result = GradientBase::fetchSingle(data->gradient, w);
4217 }
4218
4219 *buffer = result;
4220
4221 det += delta_det;
4222 delta_det += delta_delta_det;
4223 b += delta_b;
4224
4225 ++buffer;
4226 }
4227 } else {
4228 while (buffer < end) {
4229 *buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(v: det) - b);
4230
4231 det += delta_det;
4232 delta_det += delta_delta_det;
4233 b += delta_b;
4234 }
4235 }
4236 }
4237};
4238
4239const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
4240 int y, int x, int length)
4241{
4242 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
4243}
4244
4245static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
4246
4247#if QT_CONFIG(raster_64bit)
4248const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
4249 int y, int x, int length)
4250{
4251 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
4252}
4253#endif
4254
4255template <class GradientBase, typename BlendType>
4256static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
4257 BlendType *buffer, const QSpanData *data,
4258 int y, int x, int length)
4259{
4260 const BlendType *b = buffer;
4261 qreal rx = data->m21 * (y + qreal(0.5))
4262 + data->dx + data->m11 * (x + qreal(0.5));
4263 qreal ry = data->m22 * (y + qreal(0.5))
4264 + data->dy + data->m12 * (x + qreal(0.5));
4265 bool affine = !data->m13 && !data->m23;
4266
4267 const qreal inv2pi = M_1_PI / 2.0;
4268
4269 const BlendType *end = buffer + length;
4270 if (affine) {
4271 rx -= data->gradient.conical.center.x;
4272 ry -= data->gradient.conical.center.y;
4273 while (buffer < end) {
4274 qreal angle = qAtan2(y: ry, x: rx) + data->gradient.conical.angle;
4275
4276 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4277
4278 rx += data->m11;
4279 ry += data->m12;
4280 ++buffer;
4281 }
4282 } else {
4283 qreal rw = data->m23 * (y + qreal(0.5))
4284 + data->m33 + data->m13 * (x + qreal(0.5));
4285 if (!rw)
4286 rw = 1;
4287 while (buffer < end) {
4288 qreal angle = qAtan2(y: ry/rw - data->gradient.conical.center.x,
4289 x: rx/rw - data->gradient.conical.center.y)
4290 + data->gradient.conical.angle;
4291
4292 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
4293
4294 rx += data->m11;
4295 ry += data->m12;
4296 rw += data->m13;
4297 if (!rw) {
4298 rw += data->m13;
4299 }
4300 ++buffer;
4301 }
4302 }
4303 return b;
4304}
4305
4306static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
4307 int y, int x, int length)
4308{
4309 return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
4310}
4311
4312#if QT_CONFIG(raster_64bit)
4313static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 *buffer, const Operator *, const QSpanData *data,
4314 int y, int x, int length)
4315{
4316 return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
4317}
4318#endif
4319
4320extern CompositionFunctionSolid qt_functionForModeSolid_C[];
4321extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
4322
4323static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
4324#if QT_CONFIG(raster_64bit)
4325static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
4326#endif
4327
4328extern CompositionFunction qt_functionForMode_C[];
4329extern CompositionFunction64 qt_functionForMode64_C[];
4330
4331static const CompositionFunction *functionForMode = qt_functionForMode_C;
4332#if QT_CONFIG(raster_64bit)
4333static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
4334#endif
4335
4336static TextureBlendType getBlendType(const QSpanData *data)
4337{
4338 TextureBlendType ft;
4339 if (data->txop <= QTransform::TxTranslate)
4340 if (data->texture.type == QTextureData::Tiled)
4341 ft = BlendTiled;
4342 else
4343 ft = BlendUntransformed;
4344 else if (data->bilinear)
4345 if (data->texture.type == QTextureData::Tiled)
4346 ft = BlendTransformedBilinearTiled;
4347 else
4348 ft = BlendTransformedBilinear;
4349 else
4350 if (data->texture.type == QTextureData::Tiled)
4351 ft = BlendTransformedTiled;
4352 else
4353 ft = BlendTransformed;
4354 return ft;
4355}
4356
4357static inline Operator getOperator(const QSpanData *data, const QSpan *spans, int spanCount)
4358{
4359 Operator op;
4360 bool solidSource = false;
4361
4362 switch(data->type) {
4363 case QSpanData::Solid:
4364 solidSource = data->solidColor.isOpaque();
4365 op.srcFetch = nullptr;
4366#if QT_CONFIG(raster_64bit)
4367 op.srcFetch64 = nullptr;
4368#endif
4369 break;
4370 case QSpanData::LinearGradient:
4371 solidSource = !data->gradient.alphaColor;
4372 getLinearGradientValues(v: &op.linear, data);
4373 op.srcFetch = qt_fetch_linear_gradient;
4374#if QT_CONFIG(raster_64bit)
4375 op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
4376#endif
4377 break;
4378 case QSpanData::RadialGradient:
4379 solidSource = !data->gradient.alphaColor;
4380 getRadialGradientValues(v: &op.radial, data);
4381 op.srcFetch = qt_fetch_radial_gradient;
4382#if QT_CONFIG(raster_64bit)
4383 op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
4384#endif
4385 break;
4386 case QSpanData::ConicalGradient:
4387 solidSource = !data->gradient.alphaColor;
4388 op.srcFetch = qt_fetch_conical_gradient;
4389#if QT_CONFIG(raster_64bit)
4390 op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
4391#endif
4392 break;
4393 case QSpanData::Texture:
4394 solidSource = !data->texture.hasAlpha;
4395 op.srcFetch = getSourceFetch(blendType: getBlendType(data), format: data->texture.format);
4396#if QT_CONFIG(raster_64bit)
4397 op.srcFetch64 = getSourceFetch64(blendType: getBlendType(data), format: data->texture.format);;
4398#endif
4399 break;
4400 default:
4401 Q_UNREACHABLE();
4402 break;
4403 }
4404#if !QT_CONFIG(raster_64bit)
4405 op.srcFetch64 = 0;
4406#endif
4407
4408 op.mode = data->rasterBuffer->compositionMode;
4409 if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
4410 op.mode = QPainter::CompositionMode_Source;
4411
4412 op.destFetch = destFetchProc[data->rasterBuffer->format];
4413#if QT_CONFIG(raster_64bit)
4414 op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
4415#else
4416 op.destFetch64 = 0;
4417#endif
4418 if (op.mode == QPainter::CompositionMode_Source &&
4419 (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) {
4420 const QSpan *lastSpan = spans + spanCount;
4421 bool alphaSpans = false;
4422 while (spans < lastSpan) {
4423 if (spans->coverage != 255) {
4424 alphaSpans = true;
4425 break;
4426 }
4427 ++spans;
4428 }
4429 if (!alphaSpans && spanCount > 0) {
4430 // If all spans are opaque we do not need to fetch dest.
4431 // But don't clear passthrough destFetch as they are just as fast and save destStore.
4432 if (op.destFetch != destFetchARGB32P)
4433 op.destFetch = destFetchUndefined;
4434#if QT_CONFIG(raster_64bit)
4435 if (op.destFetch64 != destFetchRGB64)
4436 op.destFetch64 = destFetch64Undefined;
4437#endif
4438 }
4439 }
4440
4441 op.destStore = destStoreProc[data->rasterBuffer->format];
4442 op.funcSolid = functionForModeSolid[op.mode];
4443 op.func = functionForMode[op.mode];
4444#if QT_CONFIG(raster_64bit)
4445 op.destStore64 = destStoreProc64[data->rasterBuffer->format];
4446 op.funcSolid64 = functionForModeSolid64[op.mode];
4447 op.func64 = functionForMode64[op.mode];
4448#else
4449 op.destStore64 = 0;
4450 op.funcSolid64 = 0;
4451 op.func64 = 0;
4452#endif
4453
4454 return op;
4455}
4456
4457static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length)
4458{
4459 switch (bpp) {
4460 case QPixelLayout::BPP64: {
4461 quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x;
4462 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
4463 break;
4464 }
4465 case QPixelLayout::BPP32: {
4466 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x;
4467 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
4468 break;
4469 }
4470 case QPixelLayout::BPP24: {
4471 quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x;
4472 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
4473 break;
4474 }
4475 case QPixelLayout::BPP16: {
4476 quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
4477 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
4478 break;
4479 }
4480 case QPixelLayout::BPP8: {
4481 uchar *dest = rasterBuffer->scanLine(y) + x;
4482 memset(s: dest + 1, c: dest[0], n: length - 1);
4483 break;
4484 }
4485 default:
4486 Q_UNREACHABLE();
4487 }
4488}
4489
4490
4491// -------------------- blend methods ---------------------
4492
4493static void blend_color_generic(int count, const QSpan *spans, void *userData)
4494{
4495 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4496 uint buffer[BufferSize];
4497 Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4498 const uint color = data->solidColor.toArgb32();
4499 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4500 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4501
4502 while (count--) {
4503 int x = spans->x;
4504 int length = spans->len;
4505 if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length) {
4506 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4507 op.destStore(data->rasterBuffer, x, spans->y, &color, 1);
4508 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans->y, length);
4509 length = 0;
4510 }
4511
4512 while (length) {
4513 int l = qMin(a: BufferSize, b: length);
4514 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4515 op.funcSolid(dest, l, color, spans->coverage);
4516 if (op.destStore)
4517 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4518 length -= l;
4519 x += l;
4520 }
4521 ++spans;
4522 }
4523}
4524
4525static void blend_color_argb(int count, const QSpan *spans, void *userData)
4526{
4527 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4528
4529 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4530 const uint color = data->solidColor.toArgb32();
4531
4532 if (op.mode == QPainter::CompositionMode_Source) {
4533 // inline for performance
4534 while (count--) {
4535 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4536 if (spans->coverage == 255) {
4537 qt_memfill(dest: target, color, count: spans->len);
4538#ifdef __SSE2__
4539 } else if (spans->len > 16) {
4540 op.funcSolid(target, spans->len, color, spans->coverage);
4541#endif
4542 } else {
4543 uint c = BYTE_MUL(x: color, a: spans->coverage);
4544 int ialpha = 255 - spans->coverage;
4545 for (int i = 0; i < spans->len; ++i)
4546 target[i] = c + BYTE_MUL(x: target[i], a: ialpha);
4547 }
4548 ++spans;
4549 }
4550 return;
4551 }
4552
4553 while (count--) {
4554 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4555 op.funcSolid(target, spans->len, color, spans->coverage);
4556 ++spans;
4557 }
4558}
4559
4560void blend_color_generic_rgb64(int count, const QSpan *spans, void *userData)
4561{
4562#if QT_CONFIG(raster_64bit)
4563 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4564 Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4565 if (!op.funcSolid64) {
4566 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
4567 return blend_color_generic(count, spans, userData);
4568 }
4569
4570 alignas(8) QRgba64 buffer[BufferSize];
4571 const QRgba64 color = data->solidColor;
4572 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
4573 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
4574
4575 while (count--) {
4576 int x = spans->x;
4577 int length = spans->len;
4578 if (solidFill && bpp >= QPixelLayout::BPP8 && spans->coverage == 255 && length && op.destStore64) {
4579 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
4580 op.destStore64(data->rasterBuffer, x, spans->y, &color, 1);
4581 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans->y, length);
4582 length = 0;
4583 }
4584
4585 while (length) {
4586 int l = qMin(a: BufferSize, b: length);
4587 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4588 op.funcSolid64(dest, l, color, spans->coverage);
4589 if (op.destStore64)
4590 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4591 length -= l;
4592 x += l;
4593 }
4594 ++spans;
4595 }
4596#else
4597 blend_color_generic(count, spans, userData);
4598#endif
4599}
4600
4601static void blend_color_rgb16(int count, const QSpan *spans, void *userData)
4602{
4603 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4604
4605 /*
4606 We duplicate a little logic from getOperator() and calculate the
4607 composition mode directly. This allows blend_color_rgb16 to be used
4608 from qt_gradient_quint16 with minimal overhead.
4609 */
4610 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4611 if (mode == QPainter::CompositionMode_SourceOver && data->solidColor.isOpaque())
4612 mode = QPainter::CompositionMode_Source;
4613
4614 if (mode == QPainter::CompositionMode_Source) {
4615 // inline for performance
4616 ushort c = data->solidColor.toRgb16();
4617 for (; count--; spans++) {
4618 if (!spans->len)
4619 continue;
4620 ushort *target = ((ushort *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4621 if (spans->coverage == 255) {
4622 qt_memfill(dest: target, color: c, count: spans->len);
4623 } else {
4624 ushort color = BYTE_MUL_RGB16(x: c, a: spans->coverage);
4625 int ialpha = 255 - spans->coverage;
4626 const ushort *end = target + spans->len;
4627 while (target < end) {
4628 *target = color + BYTE_MUL_RGB16(x: *target, a: ialpha);
4629 ++target;
4630 }
4631 }
4632 }
4633 return;
4634 }
4635
4636 if (mode == QPainter::CompositionMode_SourceOver) {
4637 for (; count--; spans++) {
4638 if (!spans->len)
4639 continue;
4640 uint color = BYTE_MUL(x: data->solidColor.toArgb32(), a: spans->coverage);
4641 int ialpha = qAlpha(rgb: ~color);
4642 ushort c = qConvertRgb32To16(c: color);
4643 ushort *target = ((ushort *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
4644 int len = spans->len;
4645 bool pre = (((quintptr)target) & 0x3) != 0;
4646 bool post = false;
4647 if (pre) {
4648 // skip to word boundary
4649 *target = c + BYTE_MUL_RGB16(x: *target, a: ialpha);
4650 ++target;
4651 --len;
4652 }
4653 if (len & 0x1) {
4654 post = true;
4655 --len;
4656 }
4657 uint *target32 = (uint*)target;
4658 uint c32 = c | (c<<16);
4659 len >>= 1;
4660 uint salpha = (ialpha+1) >> 3; // calculate here rather than in loop
4661 while (len--) {
4662 // blend full words
4663 *target32 = c32 + BYTE_MUL_RGB16_32(x: *target32, a: salpha);
4664 ++target32;
4665 target += 2;
4666 }
4667 if (post) {
4668 // one last pixel beyond a full word
4669 *target = c + BYTE_MUL_RGB16(x: *target, a: ialpha);
4670 }
4671 }
4672 return;
4673 }
4674
4675 blend_color_generic(count, spans, userData);
4676}
4677
4678template <typename T>
4679void handleSpans(int count, const QSpan *spans, const QSpanData *data, T &handler)
4680{
4681 uint const_alpha = 256;
4682 if (data->type == QSpanData::Texture)
4683 const_alpha = data->texture.const_alpha;
4684
4685 int coverage = 0;
4686 while (count) {
4687 if (!spans->len) {
4688 ++spans;
4689 --count;
4690 continue;
4691 }
4692 int x = spans->x;
4693 const int y = spans->y;
4694 int right = x + spans->len;
4695
4696 // compute length of adjacent spans
4697 for (int i = 1; i < count && spans[i].y == y && spans[i].x == right; ++i)
4698 right += spans[i].len;
4699 int length = right - x;
4700
4701 while (length) {
4702 int l = qMin(a: BufferSize, b: length);
4703 length -= l;
4704
4705 int process_length = l;
4706 int process_x = x;
4707
4708 const typename T::BlendType *src = handler.fetch(process_x, y, process_length);
4709 int offset = 0;
4710 while (l > 0) {
4711 if (x == spans->x) // new span?
4712 coverage = (spans->coverage * const_alpha) >> 8;
4713
4714 int right = spans->x + spans->len;
4715 int len = qMin(a: l, b: right - x);
4716
4717 handler.process(x, y, len, coverage, src, offset);
4718
4719 l -= len;
4720 x += len;
4721 offset += len;
4722
4723 if (x == right) { // done with current span?
4724 ++spans;
4725 --count;
4726 }
4727 }
4728 handler.store(process_x, y, process_length);
4729 }
4730 }
4731}
4732
4733template<typename T>
4734struct QBlendBase
4735{
4736 typedef T BlendType;
4737 QBlendBase(QSpanData *d, const Operator &o)
4738 : data(d)
4739 , op(o)
4740 , dest(nullptr)
4741 {
4742 }
4743
4744 QSpanData *data;
4745 Operator op;
4746
4747 BlendType *dest;
4748
4749 alignas(8) BlendType buffer[BufferSize];
4750 alignas(8) BlendType src_buffer[BufferSize];
4751};
4752
4753class BlendSrcGeneric : public QBlendBase<uint>
4754{
4755public:
4756 BlendSrcGeneric(QSpanData *d, const Operator &o)
4757 : QBlendBase<uint>(d, o)
4758 {
4759 }
4760
4761 const uint *fetch(int x, int y, int len)
4762 {
4763 dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4764 return op.srcFetch(src_buffer, &op, data, y, x, len);
4765 }
4766
4767 void process(int, int, int len, int coverage, const uint *src, int offset)
4768 {
4769 op.func(dest + offset, src + offset, len, coverage);
4770 }
4771
4772 void store(int x, int y, int len)
4773 {
4774 if (op.destStore)
4775 op.destStore(data->rasterBuffer, x, y, dest, len);
4776 }
4777};
4778
4779#if QT_CONFIG(raster_64bit)
4780class BlendSrcGenericRGB64 : public QBlendBase<QRgba64>
4781{
4782public:
4783 BlendSrcGenericRGB64(QSpanData *d, const Operator &o)
4784 : QBlendBase<QRgba64>(d, o)
4785 {
4786 }
4787
4788 bool isSupported() const
4789 {
4790 return op.func64 && op.destFetch64;
4791 }
4792
4793 const QRgba64 *fetch(int x, int y, int len)
4794 {
4795 dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4796 return op.srcFetch64(src_buffer, &op, data, y, x, len);
4797 }
4798
4799 void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
4800 {
4801 op.func64(dest + offset, src + offset, len, coverage);
4802 }
4803
4804 void store(int x, int y, int len)
4805 {
4806 if (op.destStore64)
4807 op.destStore64(data->rasterBuffer, x, y, dest, len);
4808 }
4809};
4810#endif
4811
4812static void blend_src_generic(int count, const QSpan *spans, void *userData)
4813{
4814 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4815 BlendSrcGeneric blend(data, getOperator(data, spans, spanCount: count));
4816 handleSpans(count, spans, data, handler&: blend);
4817}
4818
4819#if QT_CONFIG(raster_64bit)
4820static void blend_src_generic_rgb64(int count, const QSpan *spans, void *userData)
4821{
4822 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4823 Operator op = getOperator(data, spans, spanCount: count);
4824 BlendSrcGenericRGB64 blend64(data, op);
4825 if (blend64.isSupported())
4826 handleSpans(count, spans, data, handler&: blend64);
4827 else {
4828 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4829 BlendSrcGeneric blend32(data, op);
4830 handleSpans(count, spans, data, handler&: blend32);
4831 }
4832}
4833#endif
4834
4835static void blend_untransformed_generic(int count, const QSpan *spans, void *userData)
4836{
4837 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4838
4839 uint buffer[BufferSize];
4840 uint src_buffer[BufferSize];
4841 Operator op = getOperator(data, spans, spanCount: count);
4842
4843 const int image_width = data->texture.width;
4844 const int image_height = data->texture.height;
4845 int xoff = -qRound(d: -data->dx);
4846 int yoff = -qRound(d: -data->dy);
4847
4848 for (; count--; spans++) {
4849 if (!spans->len)
4850 continue;
4851 int x = spans->x;
4852 int length = spans->len;
4853 int sx = xoff + x;
4854 int sy = yoff + spans->y;
4855 if (sy >= 0 && sy < image_height && sx < image_width) {
4856 if (sx < 0) {
4857 x -= sx;
4858 length += sx;
4859 sx = 0;
4860 }
4861 if (sx + length > image_width)
4862 length = image_width - sx;
4863 if (length > 0) {
4864 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4865 while (length) {
4866 int l = qMin(a: BufferSize, b: length);
4867 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4868 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
4869 op.func(dest, src, l, coverage);
4870 if (op.destStore)
4871 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
4872 x += l;
4873 sx += l;
4874 length -= l;
4875 }
4876 }
4877 }
4878 }
4879}
4880
4881#if QT_CONFIG(raster_64bit)
4882static void blend_untransformed_generic_rgb64(int count, const QSpan *spans, void *userData)
4883{
4884 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4885
4886 Operator op = getOperator(data, spans, spanCount: count);
4887 if (!op.func64) {
4888 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4889 return blend_untransformed_generic(count, spans, userData);
4890 }
4891 alignas(8) QRgba64 buffer[BufferSize];
4892 alignas(8) QRgba64 src_buffer[BufferSize];
4893
4894 const int image_width = data->texture.width;
4895 const int image_height = data->texture.height;
4896 int xoff = -qRound(d: -data->dx);
4897 int yoff = -qRound(d: -data->dy);
4898
4899 for (; count--; spans++) {
4900 if (!spans->len)
4901 continue;
4902 int x = spans->x;
4903 int length = spans->len;
4904 int sx = xoff + x;
4905 int sy = yoff + spans->y;
4906 if (sy >= 0 && sy < image_height && sx < image_width) {
4907 if (sx < 0) {
4908 x -= sx;
4909 length += sx;
4910 sx = 0;
4911 }
4912 if (sx + length > image_width)
4913 length = image_width - sx;
4914 if (length > 0) {
4915 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4916 while (length) {
4917 int l = qMin(a: BufferSize, b: length);
4918 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4919 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
4920 op.func64(dest, src, l, coverage);
4921 if (op.destStore64)
4922 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
4923 x += l;
4924 sx += l;
4925 length -= l;
4926 }
4927 }
4928 }
4929 }
4930}
4931#endif
4932
4933static void blend_untransformed_argb(int count, const QSpan *spans, void *userData)
4934{
4935 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4936 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4937 && data->texture.format != QImage::Format_RGB32) {
4938 blend_untransformed_generic(count, spans, userData);
4939 return;
4940 }
4941
4942 Operator op = getOperator(data, spans, spanCount: count);
4943
4944 const int image_width = data->texture.width;
4945 const int image_height = data->texture.height;
4946 int xoff = -qRound(d: -data->dx);
4947 int yoff = -qRound(d: -data->dy);
4948
4949 for (; count--; spans++) {
4950 if (!spans->len)
4951 continue;
4952 int x = spans->x;
4953 int length = spans->len;
4954 int sx = xoff + x;
4955 int sy = yoff + spans->y;
4956 if (sy >= 0 && sy < image_height && sx < image_width) {
4957 if (sx < 0) {
4958 x -= sx;
4959 length += sx;
4960 sx = 0;
4961 }
4962 if (sx + length > image_width)
4963 length = image_width - sx;
4964 if (length > 0) {
4965 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
4966 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
4967 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + x;
4968 op.func(dest, src, length, coverage);
4969 }
4970 }
4971 }
4972}
4973
4974static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4975 quint16 y, quint8 b)
4976{
4977 quint16 t = ((((x & 0x07e0) * a) + ((y & 0x07e0) * b)) >> 5) & 0x07e0;
4978 t |= ((((x & 0xf81f) * a) + ((y & 0xf81f) * b)) >> 5) & 0xf81f;
4979
4980 return t;
4981}
4982
4983static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4984 quint32 y, quint8 b)
4985{
4986 uint t;
4987 t = ((((x & 0xf81f07e0) >> 5) * a) + (((y & 0xf81f07e0) >> 5) * b)) & 0xf81f07e0;
4988 t |= ((((x & 0x07e0f81f) * a) + ((y & 0x07e0f81f) * b)) >> 5) & 0x07e0f81f;
4989 return t;
4990}
4991
4992static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4993 const quint16 *Q_DECL_RESTRICT src,
4994 int length,
4995 const quint8 alpha,
4996 const quint8 ialpha)
4997{
4998 const int dstAlign = ((quintptr)dest) & 0x3;
4999 if (dstAlign) {
5000 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
5001 ++dest;
5002 ++src;
5003 --length;
5004 }
5005 const int srcAlign = ((quintptr)src) & 0x3;
5006 int length32 = length >> 1;
5007 if (length32 && srcAlign == 0) {
5008 while (length32--) {
5009 const quint32 *src32 = reinterpret_cast<const quint32*>(src);
5010 quint32 *dest32 = reinterpret_cast<quint32*>(dest);
5011 *dest32 = interpolate_pixel_rgb16x2_255(x: *src32, a: alpha,
5012 y: *dest32, b: ialpha);
5013 dest += 2;
5014 src += 2;
5015 }
5016 length &= 0x1;
5017 }
5018 while (length--) {
5019 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
5020 ++dest;
5021 ++src;
5022 }
5023}
5024
5025static void blend_untransformed_rgb565(int count, const QSpan *spans, void *userData)
5026{
5027 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5028 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5029
5030 if (data->texture.format != QImage::Format_RGB16
5031 || (mode != QPainter::CompositionMode_SourceOver
5032 && mode != QPainter::CompositionMode_Source))
5033 {
5034 blend_untransformed_generic(count, spans, userData);
5035 return;
5036 }
5037
5038 const int image_width = data->texture.width;
5039 const int image_height = data->texture.height;
5040 int xoff = -qRound(d: -data->dx);
5041 int yoff = -qRound(d: -data->dy);
5042
5043 const QSpan *end = spans + count;
5044 while (spans < end) {
5045 if (!spans->len) {
5046 ++spans;
5047 continue;
5048 }
5049 const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5050 if (coverage == 0) {
5051 ++spans;
5052 continue;
5053 }
5054
5055 int x = spans->x;
5056 int length = spans->len;
5057 int sx = xoff + x;
5058 int sy = yoff + spans->y;
5059 if (sy >= 0 && sy < image_height && sx < image_width) {
5060 if (sx < 0) {
5061 x -= sx;
5062 length += sx;
5063 sx = 0;
5064 }
5065 if (sx + length > image_width)
5066 length = image_width - sx;
5067 if (length > 0) {
5068 quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(y: spans->y) + x;
5069 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
5070 if (coverage == 255) {
5071 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
5072 } else {
5073 const quint8 alpha = (coverage + 1) >> 3;
5074 const quint8 ialpha = 0x20 - alpha;
5075 if (alpha > 0)
5076 blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
5077 }
5078 }
5079 }
5080 ++spans;
5081 }
5082}
5083
5084static void blend_tiled_generic(int count, const QSpan *spans, void *userData)
5085{
5086 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5087
5088 uint buffer[BufferSize];
5089 uint src_buffer[BufferSize];
5090 Operator op = getOperator(data, spans, spanCount: count);
5091
5092 const int image_width = data->texture.width;
5093 const int image_height = data->texture.height;
5094 int xoff = -qRound(d: -data->dx) % image_width;
5095 int yoff = -qRound(d: -data->dy) % image_height;
5096
5097 if (xoff < 0)
5098 xoff += image_width;
5099 if (yoff < 0)
5100 yoff += image_height;
5101
5102 while (count--) {
5103 int x = spans->x;
5104 int length = spans->len;
5105 int sx = (xoff + spans->x) % image_width;
5106 int sy = (spans->y + yoff) % image_height;
5107 if (sx < 0)
5108 sx += image_width;
5109 if (sy < 0)
5110 sy += image_height;
5111
5112 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5113 while (length) {
5114 int l = qMin(a: image_width - sx, b: length);
5115 if (BufferSize < l)
5116 l = BufferSize;
5117 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
5118 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans->y, l);
5119 op.func(dest, src, l, coverage);
5120 if (op.destStore)
5121 op.destStore(data->rasterBuffer, x, spans->y, dest, l);
5122 x += l;
5123 sx += l;
5124 length -= l;
5125 if (sx >= image_width)
5126 sx = 0;
5127 }
5128 ++spans;
5129 }
5130}
5131
5132#if QT_CONFIG(raster_64bit)
5133static void blend_tiled_generic_rgb64(int count, const QSpan *spans, void *userData)
5134{
5135 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5136
5137 Operator op = getOperator(data, spans, spanCount: count);
5138 if (!op.func64) {
5139 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
5140 return blend_tiled_generic(count, spans, userData);
5141 }
5142 alignas(8) QRgba64 buffer[BufferSize];
5143 alignas(8) QRgba64 src_buffer[BufferSize];
5144
5145 const int image_width = data->texture.width;
5146 const int image_height = data->texture.height;
5147 int xoff = -qRound(d: -data->dx) % image_width;
5148 int yoff = -qRound(d: -data->dy) % image_height;
5149
5150 if (xoff < 0)
5151 xoff += image_width;
5152 if (yoff < 0)
5153 yoff += image_height;
5154
5155 bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
5156 bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
5157 if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
5158 // If destination isn't blended into the result, we can do the tiling directly on destination pixels.
5159 while (count--) {
5160 int x = spans->x;
5161 int y = spans->y;
5162 int length = spans->len;
5163 int sx = (xoff + spans->x) % image_width;
5164 int sy = (spans->y + yoff) % image_height;
5165 if (sx < 0)
5166 sx += image_width;
5167 if (sy < 0)
5168 sy += image_height;
5169
5170 int sl = qMin(a: image_width, b: length);
5171 if (sx > 0 && sl > 0) {
5172 int l = qMin(a: image_width - sx, b: sl);
5173 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5174 op.destStore64(data->rasterBuffer, x, y, src, l);
5175 x += l;
5176 sx += l;
5177 sl -= l;
5178 if (sx >= image_width)
5179 sx = 0;
5180 }
5181 if (sl > 0) {
5182 Q_ASSERT(sx == 0);
5183 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
5184 op.destStore64(data->rasterBuffer, x, y, src, sl);
5185 x += sl;
5186 sx += sl;
5187 sl -= sl;
5188 if (sx >= image_width)
5189 sx = 0;
5190 }
5191 if (isBpp32) {
5192 uint *dest = reinterpret_cast<uint *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5193 for (int i = image_width; i < length; ++i)
5194 dest[i] = dest[i - image_width];
5195 } else {
5196 quint64 *dest = reinterpret_cast<quint64 *>(data->rasterBuffer->scanLine(y)) + x - image_width;
5197 for (int i = image_width; i < length; ++i)
5198 dest[i] = dest[i - image_width];
5199 }
5200 ++spans;
5201 }
5202 return;
5203 }
5204
5205 while (count--) {
5206 int x = spans->x;
5207 int length = spans->len;
5208 int sx = (xoff + spans->x) % image_width;
5209 int sy = (spans->y + yoff) % image_height;
5210 if (sx < 0)
5211 sx += image_width;
5212 if (sy < 0)
5213 sy += image_height;
5214
5215 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5216 while (length) {
5217 int l = qMin(a: image_width - sx, b: length);
5218 if (BufferSize < l)
5219 l = BufferSize;
5220 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
5221 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans->y, l);
5222 op.func64(dest, src, l, coverage);
5223 if (op.destStore64)
5224 op.destStore64(data->rasterBuffer, x, spans->y, dest, l);
5225 x += l;
5226 sx += l;
5227 length -= l;
5228 if (sx >= image_width)
5229 sx = 0;
5230 }
5231 ++spans;
5232 }
5233}
5234#endif
5235
5236static void blend_tiled_argb(int count, const QSpan *spans, void *userData)
5237{
5238 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5239 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
5240 && data->texture.format != QImage::Format_RGB32) {
5241 blend_tiled_generic(count, spans, userData);
5242 return;
5243 }
5244
5245 Operator op = getOperator(data, spans, spanCount: count);
5246
5247 int image_width = data->texture.width;
5248 int image_height = data->texture.height;
5249 int xoff = -qRound(d: -data->dx) % image_width;
5250 int yoff = -qRound(d: -data->dy) % image_height;
5251
5252 if (xoff < 0)
5253 xoff += image_width;
5254 if (yoff < 0)
5255 yoff += image_height;
5256
5257 while (count--) {
5258 int x = spans->x;
5259 int length = spans->len;
5260 int sx = (xoff + spans->x) % image_width;
5261 int sy = (spans->y + yoff) % image_height;
5262 if (sx < 0)
5263 sx += image_width;
5264 if (sy < 0)
5265 sy += image_height;
5266
5267 const int coverage = (spans->coverage * data->texture.const_alpha) >> 8;
5268 while (length) {
5269 int l = qMin(a: image_width - sx, b: length);
5270 if (BufferSize < l)
5271 l = BufferSize;
5272 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
5273 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + x;
5274 op.func(dest, src, l, coverage);
5275 x += l;
5276 sx += l;
5277 length -= l;
5278 if (sx >= image_width)
5279 sx = 0;
5280 }
5281 ++spans;
5282 }
5283}
5284
5285static void blend_tiled_rgb565(int count, const QSpan *spans, void *userData)
5286{
5287 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
5288 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
5289
5290 if (data->texture.format != QImage::Format_RGB16
5291 || (mode != QPainter::CompositionMode_SourceOver
5292 && mode != QPainter::CompositionMode_Source))
5293 {
5294 blend_tiled_generic(count, spans, userData);
5295 return;
5296 }
5297
5298 const int image_width = data->texture.width;
5299 const int image_height = data->texture.height;
5300 int xoff = -qRound(d: -data->dx) % image_width;
5301 int yoff = -qRound(d: -data->dy) % image_height;
5302
5303 if (xoff < 0)
5304 xoff += image_width;
5305 if (yoff < 0)
5306 yoff += image_height;
5307
5308 while (count--) {
5309 const quint8 coverage = (data->texture.const_alpha * spans->coverage) >> 8;
5310 if (coverage == 0) {
5311 ++spans;
5312 continue;
5313 }
5314
5315 int x = spans->x;
5316 int length = spans->len;
5317 int sx = (xoff + spans->x) % image_width;
5318 int sy = (spans->y + yoff) % image_height;
5319 if (sx < 0)
5320 sx += image_width;
5321 if (sy < 0)
5322 sy += image_height;
5323
5324 if (coverage == 255) {
5325 // Copy the first texture block
5326 length = qMin(a: image_width,b: length);
5327 int tx = x;
5328 while (length) {
5329 int l = qMin(a: image_width - sx, b: length);
5330 if (BufferSize < l)
5331 l = BufferSize;
5332 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans->y)) + tx;
5333 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
5334 memcpy(dest: dest, src: src, n: l * sizeof(quint16));
5335 length -= l;
5336 tx += l;
5337 sx += l;
5338 if (sx >= image_width)
5339 sx = 0;
5340 }
5341
5342 // Now use the rasterBuffer as the source of the texture,
5343 // We can now progressively copy larger blocks
5344 // - Less cpu time in code figuring out what to copy
5345 // We are dealing with one block of data
5346 // - More likely to fit in the cache
5347 // - can use memcpy
5348 int copy_image_width = qMin(a: image_width, b: int(spans->len));
5349 length = spans->len - copy_image_width;
5350 quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(y: spans->y)) + x;
5351 quint16 *dest = src + copy_image_width;
5352 while (copy_image_width < length) {
5353 memcpy(dest: dest, src: src, n: copy_image_width * sizeof(quint16));
5354 dest += copy_image_width;
5355 length -= copy_image_width;
5356 copy_image_width *= 2;
5357 }
5358 if (length > 0)
5359 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
5360 } else {
5361 const quint8 alpha = (coverage + 1) >> 3;
5362 const quint8 ialpha = 0x20 - alpha;
5363 if (alpha > 0) {
5364 while (length) {
5365 int l = qMin(a: image_width - sx, b: length);
5366 if (BufferSize < l)
5367 l = BufferSize;
5368 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans->y)) + x;
5369 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
5370 blend_sourceOver_rgb16_rgb16(dest, src, length: l, alpha, ialpha);
5371 x += l;
5372 sx += l;
5373 length -= l;
5374 if (sx >= image_width)
5375 sx = 0;
5376 }
5377 }
5378 }
5379 ++spans;
5380 }
5381}
5382
5383/* Image formats here are target formats */
5384static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
5385 blend_untransformed_argb, // Untransformed
5386 blend_tiled_argb, // Tiled
5387 blend_src_generic, // Transformed
5388 blend_src_generic, // TransformedTiled
5389 blend_src_generic, // TransformedBilinear
5390 blend_src_generic // TransformedBilinearTiled
5391};
5392
5393static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
5394 blend_untransformed_rgb565, // Untransformed
5395 blend_tiled_rgb565, // Tiled
5396 blend_src_generic, // Transformed
5397 blend_src_generic, // TransformedTiled
5398 blend_src_generic, // TransformedBilinear
5399 blend_src_generic // TransformedBilinearTiled
5400};
5401
5402static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
5403 blend_untransformed_generic, // Untransformed
5404 blend_tiled_generic, // Tiled
5405 blend_src_generic, // Transformed
5406 blend_src_generic, // TransformedTiled
5407 blend_src_generic, // TransformedBilinear
5408 blend_src_generic // TransformedBilinearTiled
5409};
5410
5411#if QT_CONFIG(raster_64bit)
5412static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
5413 blend_untransformed_generic_rgb64, // Untransformed
5414 blend_tiled_generic_rgb64, // Tiled
5415 blend_src_generic_rgb64, // Transformed
5416 blend_src_generic_rgb64, // TransformedTiled
5417 blend_src_generic_rgb64, // TransformedBilinear
5418 blend_src_generic_rgb64 // TransformedBilinearTiled
5419};
5420#endif
5421
5422void qBlendTexture(int count, const QSpan *spans, void *userData)
5423{
5424 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5425 TextureBlendType blendType = getBlendType(data);
5426 ProcessSpans proc;
5427 switch (data->rasterBuffer->format) {
5428 case QImage::Format_ARGB32_Premultiplied:
5429 proc = processTextureSpansARGB32PM[blendType];
5430 break;
5431 case QImage::Format_RGB16:
5432 proc = processTextureSpansRGB16[blendType];
5433 break;
5434#if QT_CONFIG(raster_64bit)
5435#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5436 case QImage::Format_ARGB32:
5437 case QImage::Format_RGBA8888:
5438#endif
5439 case QImage::Format_BGR30:
5440 case QImage::Format_A2BGR30_Premultiplied:
5441 case QImage::Format_RGB30:
5442 case QImage::Format_A2RGB30_Premultiplied:
5443 case QImage::Format_RGBX64:
5444 case QImage::Format_RGBA64:
5445 case QImage::Format_RGBA64_Premultiplied:
5446 case QImage::Format_Grayscale16:
5447 proc = processTextureSpansGeneric64[blendType];
5448 break;
5449#endif // QT_CONFIG(raster_64bit)
5450 case QImage::Format_Invalid:
5451 Q_UNREACHABLE();
5452 return;
5453 default:
5454 proc = processTextureSpansGeneric[blendType];
5455 break;
5456 }
5457 proc(count, spans, userData);
5458}
5459
5460static void blend_vertical_gradient_argb(int count, const QSpan *spans, void *userData)
5461{
5462 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5463
5464 LinearGradientValues linear;
5465 getLinearGradientValues(v: &linear, data);
5466
5467 CompositionFunctionSolid funcSolid =
5468 functionForModeSolid[data->rasterBuffer->compositionMode];
5469
5470 /*
5471 The logic for vertical gradient calculations is a mathematically
5472 reduced copy of that in fetchLinearGradient() - which is basically:
5473
5474 qreal ry = data->m22 * (y + 0.5) + data->dy;
5475 qreal t = linear.dy*ry + linear.off;
5476 t *= (GRADIENT_STOPTABLE_SIZE - 1);
5477 quint32 color =
5478 qt_gradient_pixel_fixed(&data->gradient,
5479 int(t * FIXPT_SIZE));
5480
5481 This has then been converted to fixed point to improve performance.
5482 */
5483 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5484 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5485 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5486
5487 while (count--) {
5488 int y = spans->y;
5489 int x = spans->x;
5490
5491 quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x;
5492 quint32 color =
5493 qt_gradient_pixel_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5494
5495 funcSolid(dst, spans->len, color, spans->coverage);
5496 ++spans;
5497 }
5498}
5499
5500template<ProcessSpans blend_color>
5501static void blend_vertical_gradient(int count, const QSpan *spans, void *userData)
5502{
5503 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5504
5505 LinearGradientValues linear;
5506 getLinearGradientValues(v: &linear, data);
5507
5508 // Based on the same logic as blend_vertical_gradient_argb.
5509
5510 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5511 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5512 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5513
5514 while (count--) {
5515 int y = spans->y;
5516
5517#if QT_CONFIG(raster_64bit)
5518 data->solidColor = qt_gradient_pixel64_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5519#else
5520 data->solidColor = QRgba64::fromArgb32(qt_gradient_pixel_fixed(&data->gradient, yinc * y + off));
5521#endif
5522 blend_color(1, spans, userData);
5523 ++spans;
5524 }
5525}
5526
5527void qBlendGradient(int count, const QSpan *spans, void *userData)
5528{
5529 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5530 bool isVerticalGradient =
5531 data->txop <= QTransform::TxScale &&
5532 data->type == QSpanData::LinearGradient &&
5533 data->gradient.linear.end.x == data->gradient.linear.origin.x;
5534 switch (data->rasterBuffer->format) {
5535 case QImage::Format_RGB16:
5536 if (isVerticalGradient)
5537 return blend_vertical_gradient<blend_color_rgb16>(count, spans, userData);
5538 return blend_src_generic(count, spans, userData);
5539 case QImage::Format_RGB32:
5540 case QImage::Format_ARGB32_Premultiplied:
5541 if (isVerticalGradient)
5542 return blend_vertical_gradient_argb(count, spans, userData);
5543 return blend_src_generic(count, spans, userData);
5544#if QT_CONFIG(raster_64bit)
5545#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5546 case QImage::Format_ARGB32:
5547 case QImage::Format_RGBA8888:
5548#endif
5549 case QImage::Format_BGR30:
5550 case QImage::Format_A2BGR30_Premultiplied:
5551 case QImage::Format_RGB30:
5552 case QImage::Format_A2RGB30_Premultiplied:
5553 case QImage::Format_RGBX64:
5554 case QImage::Format_RGBA64:
5555 case QImage::Format_RGBA64_Premultiplied:
5556 if (isVerticalGradient)
5557 return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
5558 return blend_src_generic_rgb64(count, spans, userData);
5559#endif // QT_CONFIG(raster_64bit)
5560 case QImage::Format_Invalid:
5561 break;
5562 default:
5563 if (isVerticalGradient)
5564 return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
5565 return blend_src_generic(count, spans, userData);
5566 }
5567 Q_UNREACHABLE();
5568}
5569
5570template <class DST> static
5571inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5572 int x, int y, DST color,
5573 const uchar *map,
5574 int mapWidth, int mapHeight, int mapStride)
5575{
5576 DST *dest = reinterpret_cast<DST *>(rasterBuffer->scanLine(y)) + x;
5577 const int destStride = rasterBuffer->stride<DST>();
5578
5579 if (mapWidth > 8) {
5580 while (mapHeight--) {
5581 int x0 = 0;
5582 int n = 0;
5583 for (int x = 0; x < mapWidth; x += 8) {
5584 uchar s = map[x >> 3];
5585 for (int i = 0; i < 8; ++i) {
5586 if (s & 0x80) {
5587 ++n;
5588 } else {
5589 if (n) {
5590 qt_memfill(dest + x0, color, n);
5591 x0 += n + 1;
5592 n = 0;
5593 } else {
5594 ++x0;
5595 }
5596 if (!s) {
5597 x0 += 8 - 1 - i;
5598 break;
5599 }
5600 }
5601 s <<= 1;
5602 }
5603 }
5604 if (n)
5605 qt_memfill(dest + x0, color, n);
5606 dest += destStride;
5607 map += mapStride;
5608 }
5609 } else {
5610 while (mapHeight--) {
5611 int x0 = 0;
5612 int n = 0;
5613 for (uchar s = *map; s; s <<= 1) {
5614 if (s & 0x80) {
5615 ++n;
5616 } else if (n) {
5617 qt_memfill(dest + x0, color, n);
5618 x0 += n + 1;
5619 n = 0;
5620 } else {
5621 ++x0;
5622 }
5623 }
5624 if (n)
5625 qt_memfill(dest + x0, color, n);
5626 dest += destStride;
5627 map += mapStride;
5628 }
5629 }
5630}
5631
5632inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5633 int x, int y, const QRgba64 &color,
5634 const uchar *map,
5635 int mapWidth, int mapHeight, int mapStride)
5636{
5637 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: color.toArgb32(),
5638 map, mapWidth, mapHeight, mapStride);
5639}
5640
5641inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5642 int x, int y, const QRgba64 &color,
5643 const uchar *map,
5644 int mapWidth, int mapHeight, int mapStride)
5645{
5646 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: ARGB2RGBA(x: color.toArgb32()),
5647 map, mapWidth, mapHeight, mapStride);
5648}
5649
5650template<QtPixelOrder PixelOrder>
5651inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5652 int x, int y, const QRgba64 &color,
5653 const uchar *map,
5654 int mapWidth, int mapHeight, int mapStride)
5655{
5656 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5657 map, mapWidth, mapHeight, mapStride);
5658}
5659
5660inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5661 int x, int y, const QRgba64 &color,
5662 const uchar *map,
5663 int mapWidth, int mapHeight, int mapStride)
5664{
5665 qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color: color.toRgb16(),
5666 map, mapWidth, mapHeight, mapStride);
5667}
5668
5669static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5670{
5671 // Do a gammacorrected gray alphablend...
5672 const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
5673
5674 QRgba64 blend = interpolate255(x: srcLinear, alpha1: coverage, y: dstLinear, alpha2: 255 - coverage);
5675
5676 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5677}
5678
5679static inline void alphamapblend_argb32(quint32 *dst, int coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5680{
5681 if (coverage == 0) {
5682 // nothing
5683 } else if (coverage == 255 || !colorProfile) {
5684 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5685 } else if (*dst < 0xff000000) {
5686 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5687 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5688 } else if (src >= 0xff000000) {
5689 grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5690 } else {
5691 // First do naive blend with text-color
5692 QRgb s = *dst;
5693 blend_pixel(dst&: s, src);
5694 // Then gamma-corrected blend with glyph shape
5695 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5696 grayBlendPixel(dst, coverage, srcLinear: s64, colorProfile);
5697 }
5698}
5699
5700#if QT_CONFIG(raster_64bit)
5701
5702static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5703{
5704 // Do a gammacorrected gray alphablend...
5705 QRgba64 dstColor = dst;
5706 if (colorProfile) {
5707 if (dstColor.isOpaque())
5708 dstColor = colorProfile->toLinear(rgb64: dstColor);
5709 else if (!dstColor.isTransparent())
5710 dstColor = colorProfile->toLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5711 }
5712
5713 blend_pixel(dst&: dstColor, src: srcLinear, const_alpha: coverage);
5714
5715 if (colorProfile) {
5716 if (dstColor.isOpaque())
5717 dstColor = colorProfile->fromLinear(rgb64: dstColor);
5718 else if (!dstColor.isTransparent())
5719 dstColor = colorProfile->fromLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5720 }
5721 dst = dstColor;
5722}
5723
5724static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5725{
5726 if (coverage == 0) {
5727 // nothing
5728 } else if (coverage == 255) {
5729 blend_pixel(dst&: dest[x], src);
5730 } else if (src.isOpaque()) {
5731 grayBlendPixel(dst&: dest[x], coverage, srcLinear, colorProfile);
5732 } else {
5733 // First do naive blend with text-color
5734 QRgba64 s = dest[x];
5735 blend_pixel(dst&: s, src);
5736 // Then gamma-corrected blend with glyph shape
5737 if (colorProfile)
5738 s = colorProfile->toLinear(rgb64: s);
5739 grayBlendPixel(dst&: dest[x], coverage, srcLinear: s, colorProfile);
5740 }
5741}
5742
5743static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5744 int x, int y, const QRgba64 &color,
5745 const uchar *map,
5746 int mapWidth, int mapHeight, int mapStride,
5747 const QClipData *clip, bool useGammaCorrection)
5748{
5749 if (color.isTransparent())
5750 return;
5751
5752 const QColorTrcLut *colorProfile = nullptr;
5753
5754 if (useGammaCorrection)
5755 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5756
5757 QRgba64 srcColor = color;
5758 if (colorProfile && color.isOpaque())
5759 srcColor = colorProfile->toLinear(rgb64: srcColor);
5760
5761 alignas(8) QRgba64 buffer[BufferSize];
5762 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5763 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5764
5765 if (!clip) {
5766 for (int ly = 0; ly < mapHeight; ++ly) {
5767 int i = x;
5768 int length = mapWidth;
5769 while (length > 0) {
5770 int l = qMin(a: BufferSize, b: length);
5771 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5772 for (int j=0; j < l; ++j) {
5773 const int coverage = map[j + (i - x)];
5774 alphamapblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5775 }
5776 if (destStore64)
5777 destStore64(rasterBuffer, i, y + ly, dest, l);
5778 length -= l;
5779 i += l;
5780 }
5781 map += mapStride;
5782 }
5783 } else {
5784 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5785
5786 int top = qMax(a: y, b: 0);
5787 map += (top - y) * mapStride;
5788
5789 const_cast<QClipData *>(clip)->initialize();
5790 for (int yp = top; yp<bottom; ++yp) {
5791 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5792
5793 for (int i=0; i<line.count; ++i) {
5794 const QSpan &clip = line.spans[i];
5795
5796 int start = qMax<int>(a: x, b: clip.x);
5797 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5798 if (end <= start)
5799 continue;
5800 Q_ASSERT(end - start <= BufferSize);
5801 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5802
5803 for (int xp=start; xp<end; ++xp) {
5804 const int coverage = map[xp - x];
5805 alphamapblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
5806 }
5807 if (destStore64)
5808 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5809 } // for (i -> line.count)
5810 map += mapStride;
5811 } // for (yp -> bottom)
5812 }
5813}
5814#else
5815static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5816 int x, int y, const QRgba64 &color,
5817 const uchar *map,
5818 int mapWidth, int mapHeight, int mapStride,
5819 const QClipData *clip, bool useGammaCorrection)
5820{
5821 if (color.isTransparent())
5822 return;
5823
5824 const quint32 c = color.toArgb32();
5825
5826 const QColorTrcLut *colorProfile = nullptr;
5827
5828 if (useGammaCorrection)
5829 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5830
5831 QRgba64 srcColor = color;
5832 if (colorProfile && color.isOpaque())
5833 srcColor = colorProfile->toLinear(srcColor);
5834
5835 quint32 buffer[BufferSize];
5836 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5837 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5838
5839 if (!clip) {
5840 for (int ly = 0; ly < mapHeight; ++ly) {
5841 int i = x;
5842 int length = mapWidth;
5843 while (length > 0) {
5844 int l = qMin(BufferSize, length);
5845 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5846 for (int j=0; j < l; ++j) {
5847 const int coverage = map[j + (i - x)];
5848 alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5849 }
5850 if (destStore)
5851 destStore(rasterBuffer, i, y + ly, dest, l);
5852 length -= l;
5853 i += l;
5854 }
5855 map += mapStride;
5856 }
5857 } else {
5858 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5859
5860 int top = qMax(y, 0);
5861 map += (top - y) * mapStride;
5862
5863 const_cast<QClipData *>(clip)->initialize();
5864 for (int yp = top; yp<bottom; ++yp) {
5865 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5866
5867 for (int i=0; i<line.count; ++i) {
5868 const QSpan &clip = line.spans[i];
5869
5870 int start = qMax<int>(x, clip.x);
5871 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5872 if (end <= start)
5873 continue;
5874 Q_ASSERT(end - start <= BufferSize);
5875 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5876
5877 for (int xp=start; xp<end; ++xp) {
5878 const int coverage = map[xp - x];
5879 alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5880 }
5881 if (destStore)
5882 destStore(rasterBuffer, start, clip.y, dest, end - start);
5883 } // for (i -> line.count)
5884 map += mapStride;
5885 } // for (yp -> bottom)
5886 }
5887}
5888#endif
5889
5890static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor)
5891{
5892 if (coverage == 0) {
5893 // nothing
5894 } else if (coverage == 255) {
5895 dest[x] = srcColor;
5896 } else {
5897 dest[x] = BYTE_MUL_RGB16(x: srcColor, a: coverage)
5898 + BYTE_MUL_RGB16(x: dest[x], a: 255 - coverage);
5899 }
5900}
5901
5902void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5903 int x, int y, const QRgba64 &color,
5904 const uchar *map,
5905 int mapWidth, int mapHeight, int mapStride,
5906 const QClipData *clip, bool useGammaCorrection)
5907{
5908 if (useGammaCorrection || !color.isOpaque()) {
5909 qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5910 return;
5911 }
5912
5913 const quint16 c = color.toRgb16();
5914
5915 if (!clip) {
5916 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
5917 const int destStride = rasterBuffer->stride<quint16>();
5918 while (mapHeight--) {
5919 for (int i = 0; i < mapWidth; ++i)
5920 alphamapblend_quint16(coverage: map[i], dest, x: i, srcColor: c);
5921 dest += destStride;
5922 map += mapStride;
5923 }
5924 } else {
5925 int top = qMax(a: y, b: 0);
5926 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5927 map += (top - y) * mapStride;
5928
5929 const_cast<QClipData *>(clip)->initialize();
5930 for (int yp = top; yp<bottom; ++yp) {
5931 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5932
5933 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y: yp));
5934
5935 for (int i=0; i<line.count; ++i) {
5936 const QSpan &clip = line.spans[i];
5937
5938 int start = qMax<int>(a: x, b: clip.x);
5939 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5940
5941 for (int xp=start; xp<end; ++xp)
5942 alphamapblend_quint16(coverage: map[xp - x], dest, x: xp, srcColor: c);
5943 } // for (i -> line.count)
5944 map += mapStride;
5945 } // for (yp -> bottom)
5946 }
5947}
5948
5949static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5950 int x, int y, const QRgba64 &color,
5951 const uchar *map,
5952 int mapWidth, int mapHeight, int mapStride,
5953 const QClipData *clip, bool useGammaCorrection)
5954{
5955 const quint32 c = color.toArgb32();
5956 const int destStride = rasterBuffer->stride<quint32>();
5957
5958 if (color.isTransparent())
5959 return;
5960
5961 const QColorTrcLut *colorProfile = nullptr;
5962
5963 if (useGammaCorrection)
5964 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5965
5966 QRgba64 srcColor = color;
5967 if (colorProfile && color.isOpaque())
5968 srcColor = colorProfile->toLinear(rgb64: srcColor);
5969
5970 if (!clip) {
5971 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5972 while (mapHeight--) {
5973 for (int i = 0; i < mapWidth; ++i) {
5974 const int coverage = map[i];
5975 alphamapblend_argb32(dst: dest + i, coverage, srcLinear: srcColor, src: c, colorProfile);
5976 }
5977 dest += destStride;
5978 map += mapStride;
5979 }
5980 } else {
5981 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5982
5983 int top = qMax(a: y, b: 0);
5984 map += (top - y) * mapStride;
5985
5986 const_cast<QClipData *>(clip)->initialize();
5987 for (int yp = top; yp<bottom; ++yp) {
5988 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5989
5990 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
5991
5992 for (int i=0; i<line.count; ++i) {
5993 const QSpan &clip = line.spans[i];
5994
5995 int start = qMax<int>(a: x, b: clip.x);
5996 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5997
5998 for (int xp=start; xp<end; ++xp) {
5999 const int coverage = map[xp - x];
6000 alphamapblend_argb32(dst: dest + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
6001 } // for (i -> line.count)
6002 } // for (yp -> bottom)
6003 map += mapStride;
6004 }
6005 }
6006}
6007
6008static inline int qRgbAvg(QRgb rgb)
6009{
6010 return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
6011}
6012
6013static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6014{
6015 // Do a gammacorrected RGB alphablend...
6016 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
6017
6018 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
6019
6020 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
6021}
6022
6023static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
6024{
6025#if defined(__SSE2__)
6026 __m128i vd = _mm_cvtsi32_si128(a: d);
6027 __m128i vs = _mm_cvtsi32_si128(a: s);
6028 __m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
6029 const __m128i vz = _mm_setzero_si128();
6030 vd = _mm_unpacklo_epi8(a: vd, b: vz);
6031 vs = _mm_unpacklo_epi8(a: vs, b: vz);
6032 va = _mm_unpacklo_epi8(a: va, b: vz);
6033 __m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: 255), b: va);
6034 vs = _mm_mullo_epi16(a: vs, b: va);
6035 vd = _mm_mullo_epi16(a: vd, b: vb);
6036 vd = _mm_add_epi16(a: vd, b: vs);
6037 vd = _mm_add_epi16(a: vd, b: _mm_srli_epi16(a: vd, count: 8));
6038 vd = _mm_add_epi16(a: vd, b: _mm_set1_epi16(w: 0x80));
6039 vd = _mm_srli_epi16(a: vd, count: 8);
6040 vd = _mm_packus_epi16(a: vd, b: vd);
6041 return _mm_cvtsi128_si32(a: vd);
6042#else
6043 const int dr = qRed(d);
6044 const int dg = qGreen(d);
6045 const int db = qBlue(d);
6046
6047 const int sr = qRed(s);
6048 const int sg = qGreen(s);
6049 const int sb = qBlue(s);
6050
6051 const int mr = qRed(rgbAlpha);
6052 const int mg = qGreen(rgbAlpha);
6053 const int mb = qBlue(rgbAlpha);
6054
6055 const int nr = qt_div_255(sr * mr + dr * (255 - mr));
6056 const int ng = qt_div_255(sg * mg + dg * (255 - mg));
6057 const int nb = qt_div_255(sb * mb + db * (255 - mb));
6058
6059 return 0xff000000 | (nr << 16) | (ng << 8) | nb;
6060#endif
6061}
6062
6063static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
6064{
6065 if (coverage == 0xff000000) {
6066 // nothing
6067 } else if (coverage == 0xffffffff && qAlpha(rgb: src) == 255) {
6068 blend_pixel(dst&: *dst, src);
6069 } else if (*dst < 0xff000000) {
6070 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
6071 blend_pixel(dst&: *dst, src, const_alpha: qRgbAvg(rgb: coverage));
6072 } else if (!colorProfile) {
6073 // First do naive blend with text-color
6074 QRgb s = *dst;
6075 blend_pixel(dst&: s, src);
6076 // Then a naive blend with glyph shape
6077 *dst = rgbBlend(d: *dst, s, rgbAlpha: coverage);
6078 } else if (srcLinear.isOpaque()) {
6079 rgbBlendPixel(dst, coverage, slinear: srcLinear, colorProfile);
6080 } else {
6081 // First do naive blend with text-color
6082 QRgb s = *dst;
6083 blend_pixel(dst&: s, src);
6084 // Then gamma-corrected blend with glyph shape
6085 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
6086 rgbBlendPixel(dst, coverage, slinear: s64, colorProfile);
6087 }
6088}
6089
6090#if QT_CONFIG(raster_64bit)
6091static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
6092{
6093 // Do a gammacorrected RGB alphablend...
6094 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: dst) : dst;
6095
6096 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
6097
6098 dst = colorProfile ? colorProfile->fromLinear(rgb64: blend) : blend;
6099}
6100
6101static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
6102{
6103 if (coverage == 0xff000000) {
6104 // nothing
6105 } else if (coverage == 0xffffffff) {
6106 blend_pixel(dst&: dest[x], src);
6107 } else if (!dest[x].isOpaque()) {
6108 // Do a gray alphablend.
6109 alphamapblend_generic(coverage: qRgbAvg(rgb: coverage), dest, x, srcLinear, src, colorProfile);
6110 } else if (src.isOpaque()) {
6111 rgbBlendPixel(dst&: dest[x], coverage, slinear: srcLinear, colorProfile);
6112 } else {
6113 // First do naive blend with text-color
6114 QRgba64 s = dest[x];
6115 blend_pixel(dst&: s, src);
6116 // Then gamma-corrected blend with glyph shape
6117 if (colorProfile)
6118 s = colorProfile->toLinear(rgb64: s);
6119 rgbBlendPixel(dst&: dest[x], coverage, slinear: s, colorProfile);
6120 }
6121}
6122
6123static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6124 int x, int y, const QRgba64 &color,
6125 const uint *src, int mapWidth, int mapHeight, int srcStride,
6126 const QClipData *clip, bool useGammaCorrection)
6127{
6128 if (color.isTransparent())
6129 return;
6130
6131 const QColorTrcLut *colorProfile = nullptr;
6132
6133 if (useGammaCorrection)
6134 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6135
6136 QRgba64 srcColor = color;
6137 if (colorProfile && color.isOpaque())
6138 srcColor = colorProfile->toLinear(rgb64: srcColor);
6139
6140 alignas(8) QRgba64 buffer[BufferSize];
6141 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
6142 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
6143
6144 if (!clip) {
6145 for (int ly = 0; ly < mapHeight; ++ly) {
6146 int i = x;
6147 int length = mapWidth;
6148 while (length > 0) {
6149 int l = qMin(a: BufferSize, b: length);
6150 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
6151 for (int j=0; j < l; ++j) {
6152 const uint coverage = src[j + (i - x)];
6153 alphargbblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
6154 }
6155 if (destStore64)
6156 destStore64(rasterBuffer, i, y + ly, dest, l);
6157 length -= l;
6158 i += l;
6159 }
6160 src += srcStride;
6161 }
6162 } else {
6163 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
6164
6165 int top = qMax(a: y, b: 0);
6166 src += (top - y) * srcStride;
6167
6168 const_cast<QClipData *>(clip)->initialize();
6169 for (int yp = top; yp<bottom; ++yp) {
6170 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6171
6172 for (int i=0; i<line.count; ++i) {
6173 const QSpan &clip = line.spans[i];
6174
6175 int start = qMax<int>(a: x, b: clip.x);
6176 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
6177 if (end <= start)
6178 continue;
6179 Q_ASSERT(end - start <= BufferSize);
6180 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
6181
6182 for (int xp=start; xp<end; ++xp) {
6183 const uint coverage = src[xp - x];
6184 alphargbblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
6185 }
6186 if (destStore64)
6187 destStore64(rasterBuffer, start, clip.y, dest, end - start);
6188 } // for (i -> line.count)
6189 src += srcStride;
6190 } // for (yp -> bottom)
6191 }
6192}
6193#else
6194static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
6195 int x, int y, const QRgba64 &color,
6196 const uint *src, int mapWidth, int mapHeight, int srcStride,
6197 const QClipData *clip, bool useGammaCorrection)
6198{
6199 if (color.isTransparent())
6200 return;
6201
6202 const quint32 c = color.toArgb32();
6203
6204 const QColorTrcLut *colorProfile = nullptr;
6205
6206 if (useGammaCorrection)
6207 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6208
6209 QRgba64 srcColor = color;
6210 if (colorProfile && color.isOpaque())
6211 srcColor = colorProfile->toLinear(srcColor);
6212
6213 quint32 buffer[BufferSize];
6214 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
6215 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
6216
6217 if (!clip) {
6218 for (int ly = 0; ly < mapHeight; ++ly) {
6219 int i = x;
6220 int length = mapWidth;
6221 while (length > 0) {
6222 int l = qMin(BufferSize, length);
6223 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
6224 for (int j=0; j < l; ++j) {
6225 const uint coverage = src[j + (i - x)];
6226 alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
6227 }
6228 if (destStore)
6229 destStore(rasterBuffer, i, y + ly, dest, l);
6230 length -= l;
6231 i += l;
6232 }
6233 src += srcStride;
6234 }
6235 } else {
6236 int bottom = qMin(y + mapHeight, rasterBuffer->height());
6237
6238 int top = qMax(y, 0);
6239 src += (top - y) * srcStride;
6240
6241 const_cast<QClipData *>(clip)->initialize();
6242 for (int yp = top; yp<bottom; ++yp) {
6243 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6244
6245 for (int i=0; i<line.count; ++i) {
6246 const QSpan &clip = line.spans[i];
6247
6248 int start = qMax<int>(x, clip.x);
6249 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
6250 if (end <= start)
6251 continue;
6252 Q_ASSERT(end - start <= BufferSize);
6253 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
6254
6255 for (int xp=start; xp<end; ++xp) {
6256 const uint coverage = src[xp - x];
6257 alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
6258 }
6259 if (destStore)
6260 destStore(rasterBuffer, start, clip.y, dest, end - start);
6261 } // for (i -> line.count)
6262 src += srcStride;
6263 } // for (yp -> bottom)
6264 }
6265}
6266#endif
6267
6268static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
6269 int x, int y, const QRgba64 &color,
6270 const uint *src, int mapWidth, int mapHeight, int srcStride,
6271 const QClipData *clip, bool useGammaCorrection)
6272{
6273 if (color.isTransparent())
6274 return;
6275
6276 const quint32 c = color.toArgb32();
6277
6278 const QColorTrcLut *colorProfile = nullptr;
6279
6280 if (useGammaCorrection)
6281 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
6282
6283 QRgba64 srcColor = color;
6284 if (colorProfile && color.isOpaque())
6285 srcColor = colorProfile->toLinear(rgb64: srcColor);
6286
6287 if (!clip) {
6288 quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
6289 const int destStride = rasterBuffer->stride<quint32>();
6290 while (mapHeight--) {
6291 for (int i = 0; i < mapWidth; ++i) {
6292 const uint coverage = src[i];
6293 alphargbblend_argb32(dst: dst + i, coverage, srcLinear: srcColor, src: c, colorProfile);
6294 }
6295
6296 dst += destStride;
6297 src += srcStride;
6298 }
6299 } else {
6300 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
6301
6302 int top = qMax(a: y, b: 0);
6303 src += (top - y) * srcStride;
6304
6305 const_cast<QClipData *>(clip)->initialize();
6306 for (int yp = top; yp<bottom; ++yp) {
6307 const QClipData::ClipLine &line = clip->m_clipLines[yp];
6308
6309 quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
6310
6311 for (int i=0; i<line.count; ++i) {
6312 const QSpan &clip = line.spans[i];
6313
6314 int start = qMax<int>(a: x, b: clip.x);
6315 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
6316
6317 for (int xp=start; xp<end; ++xp) {
6318 const uint coverage = src[xp - x];
6319 alphargbblend_argb32(dst: dst + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
6320 }
6321 } // for (i -> line.count)
6322 src += srcStride;
6323 } // for (yp -> bottom)
6324
6325 }
6326}
6327
6328static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
6329 int x, int y, int width, int height,
6330 const QRgba64 &color)
6331{
6332 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6333 value: color.toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6334}
6335
6336static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
6337 int x, int y, int width, int height,
6338 const QRgba64 &color)
6339{
6340 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6341 quint32 c32 = color.toArgb32();
6342 quint16 c16;
6343 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr);
6344 qt_rectfill<quint16>(dest: reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
6345 value: c16, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6346}
6347
6348static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
6349 int x, int y, int width, int height,
6350 const QRgba64 &color)
6351{
6352 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
6353 quint32 c32 = color.toArgb32();
6354 quint24 c24;
6355 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr);
6356 qt_rectfill<quint24>(dest: reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
6357 value: c24, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6358}
6359
6360static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
6361 int x, int y, int width, int height,
6362 const QRgba64 &color)
6363{
6364 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6365 value: color.unpremultiplied().toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6366}
6367
6368static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
6369 int x, int y, int width, int height,
6370 const QRgba64 &color)
6371{
6372 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6373 value: ARGB2RGBA(x: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6374}
6375
6376static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
6377 int x, int y, int width, int height,
6378 const QRgba64 &color)
6379{
6380 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6381 value: ARGB2RGBA(x: color.unpremultiplied().toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6382}
6383
6384template<QtPixelOrder PixelOrder>
6385static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
6386 int x, int y, int width, int height,
6387 const QRgba64 &color)
6388{
6389 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
6390 qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
6391}
6392
6393static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
6394 int x, int y, int width, int height,
6395 const QRgba64 &color)
6396{
6397 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6398 value: color.alpha() >> 8, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6399}
6400
6401static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
6402 int x, int y, int width, int height,
6403 const QRgba64 &color)
6404{
6405 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
6406 value: qGray(rgb: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
6407}
6408
6409static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
6410 int x, int y, int width, int height,
6411 const QRgba64 &color)
6412{
6413 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
6414 quint64 c64;
6415 store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
6416 qt_rectfill<quint64>(dest: reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
6417 value: c64, x, y, width, height, stride: rasterBuffer->bytesPerLine());
6418}
6419
6420// Map table for destination image format. Contains function pointers
6421// for blends of various types unto the destination
6422
6423DrawHelper qDrawHelper[QImage::NImageFormats] =
6424{
6425 // Format_Invalid,
6426 { .blendColor: nullptr, .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr },
6427 // Format_Mono,
6428 {
6429 .blendColor: blend_color_generic,
6430 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6431 },
6432 // Format_MonoLSB,
6433 {
6434 .blendColor: blend_color_generic,
6435 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6436 },
6437 // Format_Indexed8,
6438 {
6439 .blendColor: blend_color_generic,
6440 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
6441 },
6442 // Format_RGB32,
6443 {
6444 .blendColor: blend_color_argb,
6445 .bitmapBlit: qt_bitmapblit_argb32,
6446 .alphamapBlit: qt_alphamapblit_argb32,
6447 .alphaRGBBlit: qt_alphargbblit_argb32,
6448 .fillRect: qt_rectfill_argb32
6449 },
6450 // Format_ARGB32,
6451 {
6452 .blendColor: blend_color_generic,
6453 .bitmapBlit: qt_bitmapblit_argb32,
6454 .alphamapBlit: qt_alphamapblit_argb32,
6455 .alphaRGBBlit: qt_alphargbblit_argb32,
6456 .fillRect: qt_rectfill_nonpremul_argb32
6457 },
6458 // Format_ARGB32_Premultiplied
6459 {
6460 .blendColor: blend_color_argb,
6461 .bitmapBlit: qt_bitmapblit_argb32,
6462 .alphamapBlit: qt_alphamapblit_argb32,
6463 .alphaRGBBlit: qt_alphargbblit_argb32,
6464 .fillRect: qt_rectfill_argb32
6465 },
6466 // Format_RGB16
6467 {
6468 .blendColor: blend_color_rgb16,
6469 .bitmapBlit: qt_bitmapblit_quint16,
6470 .alphamapBlit: qt_alphamapblit_quint16,
6471 .alphaRGBBlit: qt_alphargbblit_generic,
6472 .fillRect: qt_rectfill_quint16
6473 },
6474 // Format_ARGB8565_Premultiplied
6475 {
6476 .blendColor: blend_color_generic,
6477 .bitmapBlit: nullptr,
6478 .alphamapBlit: qt_alphamapblit_generic,
6479 .alphaRGBBlit: qt_alphargbblit_generic,
6480 .fillRect: qt_rectfill_quint24
6481 },
6482 // Format_RGB666
6483 {
6484 .blendColor: blend_color_generic,
6485 .bitmapBlit: nullptr,
6486 .alphamapBlit: qt_alphamapblit_generic,
6487 .alphaRGBBlit: qt_alphargbblit_generic,
6488 .fillRect: qt_rectfill_quint24
6489 },
6490 // Format_ARGB6666_Premultiplied
6491 {
6492 .blendColor: blend_color_generic,
6493 .bitmapBlit: nullptr,
6494 .alphamapBlit: qt_alphamapblit_generic,
6495 .alphaRGBBlit: qt_alphargbblit_generic,
6496 .fillRect: qt_rectfill_quint24
6497 },
6498 // Format_RGB555
6499 {
6500 .blendColor: blend_color_generic,
6501 .bitmapBlit: nullptr,
6502 .alphamapBlit: qt_alphamapblit_generic,
6503 .alphaRGBBlit: qt_alphargbblit_generic,
6504 .fillRect: qt_rectfill_quint16
6505 },
6506 // Format_ARGB8555_Premultiplied
6507 {
6508 .blendColor: blend_color_generic,
6509 .bitmapBlit: nullptr,
6510 .alphamapBlit: qt_alphamapblit_generic,
6511 .alphaRGBBlit: qt_alphargbblit_generic,
6512 .fillRect: qt_rectfill_quint24
6513 },
6514 // Format_RGB888
6515 {
6516 .blendColor: blend_color_generic,
6517 .bitmapBlit: nullptr,
6518 .alphamapBlit: qt_alphamapblit_generic,
6519 .alphaRGBBlit: qt_alphargbblit_generic,
6520 .fillRect: qt_rectfill_quint24
6521 },
6522 // Format_RGB444
6523 {
6524 .blendColor: blend_color_generic,
6525 .bitmapBlit: nullptr,
6526 .alphamapBlit: qt_alphamapblit_generic,
6527 .alphaRGBBlit: qt_alphargbblit_generic,
6528 .fillRect: qt_rectfill_quint16
6529 },
6530 // Format_ARGB4444_Premultiplied
6531 {
6532 .blendColor: blend_color_generic,
6533 .bitmapBlit: nullptr,
6534 .alphamapBlit: qt_alphamapblit_generic,
6535 .alphaRGBBlit: qt_alphargbblit_generic,
6536 .fillRect: qt_rectfill_quint16
6537 },
6538 // Format_RGBX8888
6539 {
6540 .blendColor: blend_color_generic,
6541 .bitmapBlit: qt_bitmapblit_rgba8888,
6542 .alphamapBlit: qt_alphamapblit_generic,
6543 .alphaRGBBlit: qt_alphargbblit_generic,
6544 .fillRect: qt_rectfill_rgba
6545 },
6546 // Format_RGBA8888
6547 {
6548 .blendColor: blend_color_generic,
6549 .bitmapBlit: qt_bitmapblit_rgba8888,
6550 .alphamapBlit: qt_alphamapblit_generic,
6551 .alphaRGBBlit: qt_alphargbblit_generic,
6552 .fillRect: qt_rectfill_nonpremul_rgba
6553 },
6554 // Format_RGB8888_Premultiplied
6555 {
6556 .blendColor: blend_color_generic,
6557 .bitmapBlit: qt_bitmapblit_rgba8888,
6558 .alphamapBlit: qt_alphamapblit_generic,
6559 .alphaRGBBlit: qt_alphargbblit_generic,
6560 .fillRect: qt_rectfill_rgba
6561 },
6562 // Format_BGR30
6563 {
6564 .blendColor: blend_color_generic_rgb64,
6565 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6566 .alphamapBlit: qt_alphamapblit_generic,
6567 .alphaRGBBlit: qt_alphargbblit_generic,
6568 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6569 },
6570 // Format_A2BGR30_Premultiplied
6571 {
6572 .blendColor: blend_color_generic_rgb64,
6573 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6574 .alphamapBlit: qt_alphamapblit_generic,
6575 .alphaRGBBlit: qt_alphargbblit_generic,
6576 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6577 },
6578 // Format_RGB30
6579 {
6580 .blendColor: blend_color_generic_rgb64,
6581 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6582 .alphamapBlit: qt_alphamapblit_generic,
6583 .alphaRGBBlit: qt_alphargbblit_generic,
6584 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6585 },
6586 // Format_A2RGB30_Premultiplied
6587 {
6588 .blendColor: blend_color_generic_rgb64,
6589 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6590 .alphamapBlit: qt_alphamapblit_generic,
6591 .alphaRGBBlit: qt_alphargbblit_generic,
6592 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6593 },
6594 // Format_Alpha8
6595 {
6596 .blendColor: blend_color_generic,
6597 .bitmapBlit: nullptr,
6598 .alphamapBlit: qt_alphamapblit_generic,
6599 .alphaRGBBlit: qt_alphargbblit_generic,
6600 .fillRect: qt_rectfill_alpha
6601 },
6602 // Format_Grayscale8
6603 {
6604 .blendColor: blend_color_generic,
6605 .bitmapBlit: nullptr,
6606 .alphamapBlit: qt_alphamapblit_generic,
6607 .alphaRGBBlit: qt_alphargbblit_generic,
6608 .fillRect: qt_rectfill_gray
6609 },
6610 // Format_RGBX64
6611 {
6612 .blendColor: blend_color_generic_rgb64,
6613 .bitmapBlit: nullptr,
6614 .alphamapBlit: qt_alphamapblit_generic,
6615 .alphaRGBBlit: qt_alphargbblit_generic,
6616 .fillRect: qt_rectfill_quint64
6617 },
6618 // Format_RGBA64
6619 {
6620 .blendColor: blend_color_generic_rgb64,
6621 .bitmapBlit: nullptr,
6622 .alphamapBlit: qt_alphamapblit_generic,
6623 .alphaRGBBlit: qt_alphargbblit_generic,
6624 .fillRect: qt_rectfill_quint64
6625 },
6626 // Format_RGBA64_Premultiplied
6627 {
6628 .blendColor: blend_color_generic_rgb64,
6629 .bitmapBlit: nullptr,
6630 .alphamapBlit: qt_alphamapblit_generic,
6631 .alphaRGBBlit: qt_alphargbblit_generic,
6632 .fillRect: qt_rectfill_quint64
6633 },
6634 // Format_Grayscale16
6635 {
6636 .blendColor: blend_color_generic_rgb64,
6637 .bitmapBlit: nullptr,
6638 .alphamapBlit: qt_alphamapblit_generic,
6639 .alphaRGBBlit: qt_alphargbblit_generic,
6640 .fillRect: qt_rectfill_quint16
6641 },
6642 // Format_BGR888
6643 {
6644 .blendColor: blend_color_generic,
6645 .bitmapBlit: nullptr,
6646 .alphamapBlit: qt_alphamapblit_generic,
6647 .alphaRGBBlit: qt_alphargbblit_generic,
6648 .fillRect: qt_rectfill_quint24
6649 },
6650};
6651
6652#if !defined(__SSE2__)
6653void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6654{
6655 qt_memfill_template<quint64>(dest, color, count);
6656}
6657#endif
6658
6659#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_INTEL) && !defined(Q_CC_CLANG)
6660__attribute__((optimize("no-tree-vectorize")))
6661#endif
6662void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6663{
6664# ifdef QT_COMPILER_SUPPORTS_SSSE3
6665 extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6666 if (qCpuHasFeature(SSSE3))
6667 return qt_memfill24_ssse3(dest, color, count);
6668# endif
6669
6670 const quint32 v = color;
6671 quint24 *end = dest + count;
6672
6673 // prolog: align dest to 32bit
6674 while ((quintptr(dest) & 0x3) && dest < end) {
6675 *dest++ = v;
6676 }
6677 if (dest >= end)
6678 return;
6679
6680 const uint val1 = qFromBigEndian(source: (v << 8) | (v >> 16));
6681 const uint val2 = qFromBigEndian(source: (v << 16) | (v >> 8));
6682 const uint val3 = qFromBigEndian(source: (v << 24) | (v >> 0));
6683
6684 for ( ; dest <= (end - 4); dest += 4) {
6685 quint32 *dst = reinterpret_cast<quint32 *>(dest);
6686 dst[0] = val1;
6687 dst[1] = val2;
6688 dst[2] = val3;
6689 }
6690
6691 // less than 4px left
6692 switch (end - dest) {
6693 case 3:
6694 *dest++ = v;
6695 Q_FALLTHROUGH();
6696 case 2:
6697 *dest++ = v;
6698 Q_FALLTHROUGH();
6699 case 1:
6700 *dest++ = v;
6701 }
6702}
6703
6704void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6705{
6706 const int align = quintptr(dest) & 0x3;
6707 if (align) {
6708 *dest++ = value;
6709 --count;
6710 }
6711
6712 if (count & 0x1)
6713 dest[count - 1] = value;
6714
6715 const quint32 value32 = (value << 16) | value;
6716 qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2);
6717}
6718
6719#if !defined(__SSE2__) && !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
6720void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6721{
6722 qt_memfill_template<quint32>(dest, color, count);
6723}
6724#endif
6725#ifdef __SSE2__
6726decltype(qt_memfill32_sse2) *qt_memfill32 = nullptr;
6727decltype(qt_memfill64_sse2) *qt_memfill64 = nullptr;
6728#endif
6729
6730#ifdef QT_COMPILER_SUPPORTS_SSE4_1
6731template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QVector<QRgb> *, QDitherInfo *);
6732#endif
6733
6734extern void qInitBlendFunctions();
6735
6736static void qInitDrawhelperFunctions()
6737{
6738 // Set up basic blend function tables.
6739 qInitBlendFunctions();
6740
6741#ifdef __SSE2__
6742# ifndef __AVX2__
6743 qt_memfill32 = qt_memfill32_sse2;
6744 qt_memfill64 = qt_memfill64_sse2;
6745# endif
6746 qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6747 qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6748 qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6749 qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6750 qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6751 qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6752 qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6753
6754 extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6755 const uchar *srcPixels, int sbpl, int srch,
6756 const QRectF &targetRect,
6757 const QRectF &sourceRect,
6758 const QRect &clip,
6759 int const_alpha);
6760 qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6761 qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6762 qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6763 qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6764
6765 extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
6766 const uchar *srcPixels, int sbpl,
6767 int w, int h,
6768 int const_alpha);
6769 extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6770 const uchar *srcPixels, int sbpl,
6771 int w, int h,
6772 int const_alpha);
6773
6774 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6775 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6776 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6777 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6778 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6779 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6780 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6781 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6782
6783 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
6784 int y, int x, int length);
6785
6786 qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6787
6788 extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6789 extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6790 extern void QT_FASTCALL comp_func_Source_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6791 extern void QT_FASTCALL comp_func_solid_Source_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6792 extern void QT_FASTCALL comp_func_Plus_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6793 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6794 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6795 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6796 qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6797 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6798
6799#ifdef QT_COMPILER_SUPPORTS_SSSE3
6800 if (qCpuHasFeature(SSSE3)) {
6801 extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
6802 const uchar *srcPixels, int sbpl,
6803 int w, int h,
6804 int const_alpha);
6805
6806 extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,
6807 int y, int x, int length);
6808 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6809 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6810 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6811 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6812 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6813 extern void QT_FASTCALL rbSwap_888_ssse3(uchar *dst, const uchar *src, int count);
6814 qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6815 qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6816 }
6817#endif // SSSE3
6818
6819#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6820 if (qCpuHasFeature(SSE4_1)) {
6821 extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6822 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QVector<QRgb> *);
6823 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6824 const QVector<QRgb> *, QDitherInfo *);
6825 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6826 const QVector<QRgb> *, QDitherInfo *);
6827 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6828 const QVector<QRgb> *, QDitherInfo *);
6829 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6830 const QVector<QRgb> *, QDitherInfo *);
6831 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6832 const QVector<QRgb> *, QDitherInfo *);
6833 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6834 const QVector<QRgb> *, QDitherInfo *);
6835 extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6836 const QVector<QRgb> *, QDitherInfo *);
6837 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6838 const QVector<QRgb> *, QDitherInfo *);
6839 extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6840 const QVector<QRgb> *, QDitherInfo *);
6841 extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6842 const QVector<QRgb> *, QDitherInfo *);
6843 extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6844 const QVector<QRgb> *, QDitherInfo *);
6845 extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6846 extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6847# ifndef __AVX2__
6848 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6849 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6850 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6851 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6852 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6853 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6854 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6855 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6856 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6857 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6858# endif
6859 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6860 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6861 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6862 qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6863 qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6864 qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6865 qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6866#if QT_CONFIG(raster_64bit)
6867 destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6868 destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6869#endif
6870 }
6871#endif
6872
6873#if defined(QT_COMPILER_SUPPORTS_AVX2)
6874 if (qCpuHasFeature(ArchHaswell)) {
6875 qt_memfill32 = qt_memfill32_avx2;
6876 qt_memfill64 = qt_memfill64_avx2;
6877 extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
6878 const uchar *srcPixels, int sbpl,
6879 int w, int h, int const_alpha);
6880 extern void qt_blend_argb32_on_argb32_avx2(uchar *destPixels, int dbpl,
6881 const uchar *srcPixels, int sbpl,
6882 int w, int h, int const_alpha);
6883 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6884 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6885 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6886 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6887 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6888 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6889 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6890 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6891
6892 extern void QT_FASTCALL comp_func_Source_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6893 extern void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6894 extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, uint color, uint const_alpha);
6895 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6896 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6897 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6898#if QT_CONFIG(raster_64bit)
6899 extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6900 extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6901 extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int length, QRgba64 color, uint const_alpha);
6902 qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6903 qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6904 qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6905#endif
6906
6907 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6908 int &fx, int &fy, int fdx, int /*fdy*/);
6909 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6910 int &fx, int &fy, int fdx, int /*fdy*/);
6911 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
6912 int &fx, int &fy, int fdx, int fdy);
6913
6914 bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6915 bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6916 bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6917
6918 extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6919 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QVector<QRgb> *);
6920 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6921 const QVector<QRgb> *, QDitherInfo *);
6922 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6923 const QVector<QRgb> *, QDitherInfo *);
6924 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6925 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6926 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6927 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6928
6929#if QT_CONFIG(raster_64bit)
6930 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QVector<QRgb> *, QDitherInfo *);
6931 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QVector<QRgb> *, QDitherInfo *);
6932 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6933 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QVector<QRgb> *, QDitherInfo *);
6934 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6935 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6936 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6937 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6938#endif
6939 }
6940#endif
6941
6942#endif // SSE2
6943
6944#if defined(__ARM_NEON__)
6945 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6946 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6947 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6948 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6949#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6950 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6951 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6952 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6953 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6954#endif
6955
6956 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6957 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6958 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6959
6960 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
6961 int y, int x, int length);
6962
6963 qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6964
6965 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6966
6967#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6968 extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6969 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QVector<QRgb> *);
6970 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6971 const QVector<QRgb> *, QDitherInfo *);
6972 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6973 const QVector<QRgb> *, QDitherInfo *);
6974 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6975 const QVector<QRgb> *, QDitherInfo *);
6976 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6977 const QVector<QRgb> *, QDitherInfo *);
6978 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6979 const QVector<QRgb> *, QDitherInfo *);
6980 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6981 const QVector<QRgb> *, QDitherInfo *);
6982 extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6983 const QVector<QRgb> *, QDitherInfo *);
6984 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6985 const QVector<QRgb> *, QDitherInfo *);
6986 extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6987 const QVector<QRgb> *, QDitherInfo *);
6988 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6989 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6990 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6991 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6992 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6993 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6994 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6995 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6996 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6997 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6998 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6999 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
7000 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
7001#endif
7002
7003#if defined(ENABLE_PIXMAN_DRAWHELPERS)
7004 // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
7005 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
7006 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
7007 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
7008
7009 qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
7010 qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
7011
7012 qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
7013 qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
7014
7015 qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
7016
7017 destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
7018 destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
7019
7020 qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon;
7021 qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon;
7022#endif
7023#endif // defined(__ARM_NEON__)
7024
7025#if defined(__MIPS_DSP__)
7026 // Composition functions are all DSP r1
7027 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
7028 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
7029 qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
7030 qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
7031 qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
7032 qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
7033 qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
7034 qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
7035 qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
7036 qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
7037
7038 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
7039 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
7040 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
7041 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
7042 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
7043 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
7044 qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
7045 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
7046
7047 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7048 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
7049 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7050 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
7051
7052 destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
7053
7054 destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
7055
7056 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
7057 sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
7058 sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
7059
7060#if defined(__MIPS_DSPR2__)
7061 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
7062 sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
7063#else
7064 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
7065#endif // defined(__MIPS_DSPR2__)
7066#endif // defined(__MIPS_DSP__)
7067}
7068
7069// Ensure initialization if this object file is linked.
7070Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
7071
7072QT_END_NAMESPACE
7073

source code of qtbase/src/gui/painting/qdrawhelper.cpp