1// Copyright (C) 2022 The Qt Company Ltd.
2// Copyright (C) 2018 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qdrawhelper_p.h"
6
7#include <qstylehints.h>
8#include <qguiapplication.h>
9#include <qatomic.h>
10#include <private/qcolortransform_p.h>
11#include <private/qcolortrclut_p.h>
12#include <private/qdrawhelper_p.h>
13#include <private/qdrawhelper_x86_p.h>
14#include <private/qdrawingprimitive_sse2_p.h>
15#include <private/qdrawhelper_neon_p.h>
16#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
17#include <private/qdrawhelper_mips_dsp_p.h>
18#endif
19#include <private/qguiapplication_p.h>
20#include <private/qpaintengine_raster_p.h>
21#include <private/qpainter_p.h>
22#include <private/qpixellayout_p.h>
23#include <private/qrgba64_p.h>
24#include <qendian.h>
25#include <qloggingcategory.h>
26#include <qmath.h>
27
28#if QT_CONFIG(thread) && !defined(Q_OS_WASM)
29#define QT_USE_THREAD_PARALLEL_FILLS
30#endif
31
32#if defined(QT_USE_THREAD_PARALLEL_FILLS)
33#include <qsemaphore.h>
34#include <qthreadpool.h>
35#include <private/qthreadpool_p.h>
36#endif
37
38QT_BEGIN_NAMESPACE
39
40Q_LOGGING_CATEGORY(lcQtGuiDrawHelper, "qt.gui.drawhelper")
41
42#define MASK(src, a) src = BYTE_MUL(src, a)
43
44/*
45 constants and structures
46*/
47
48constexpr int fixed_scale = 1 << 16;
49constexpr int half_point = 1 << 15;
50
51template <QPixelLayout::BPP bpp> static
52inline uint QT_FASTCALL fetch1Pixel(const uchar *, int)
53{
54 Q_UNREACHABLE_RETURN(0);
55}
56
57template <>
58inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP1LSB>(const uchar *src, int index)
59{
60 return (src[index >> 3] >> (index & 7)) & 1;
61}
62
63template <>
64inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP1MSB>(const uchar *src, int index)
65{
66 return (src[index >> 3] >> (~index & 7)) & 1;
67}
68
69template <>
70inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP8>(const uchar *src, int index)
71{
72 return src[index];
73}
74
75template <>
76inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP16>(const uchar *src, int index)
77{
78 return reinterpret_cast<const quint16 *>(src)[index];
79}
80
81template <>
82inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP24>(const uchar *src, int index)
83{
84 return reinterpret_cast<const quint24 *>(src)[index];
85}
86
87template <>
88inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP32>(const uchar *src, int index)
89{
90 return reinterpret_cast<const uint *>(src)[index];
91}
92
93template <>
94inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP64>(const uchar *src, int index)
95{
96 // We have to do the conversion in fetch to fit into a 32bit uint
97 QRgba64 c = reinterpret_cast<const QRgba64 *>(src)[index];
98 return c.toArgb32();
99}
100
101template <>
102inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP16FPx4>(const uchar *src, int index)
103{
104 // We have to do the conversion in fetch to fit into a 32bit uint
105 QRgbaFloat16 c = reinterpret_cast<const QRgbaFloat16 *>(src)[index];
106 return c.toArgb32();
107}
108
109template <>
110inline uint QT_FASTCALL fetch1Pixel<QPixelLayout::BPP32FPx4>(const uchar *src, int index)
111{
112 // We have to do the conversion in fetch to fit into a 32bit uint
113 QRgbaFloat32 c = reinterpret_cast<const QRgbaFloat32 *>(src)[index];
114 return c.toArgb32();
115}
116
117typedef uint (QT_FASTCALL *Fetch1PixelFunc)(const uchar *src, int index);
118
119constexpr Fetch1PixelFunc fetch1PixelTable[QPixelLayout::BPPCount] = {
120 nullptr, // BPPNone
121 fetch1Pixel<QPixelLayout::BPP1MSB>,
122 fetch1Pixel<QPixelLayout::BPP1LSB>,
123 fetch1Pixel<QPixelLayout::BPP8>,
124 fetch1Pixel<QPixelLayout::BPP16>,
125 fetch1Pixel<QPixelLayout::BPP24>,
126 fetch1Pixel<QPixelLayout::BPP32>,
127 fetch1Pixel<QPixelLayout::BPP64>,
128 fetch1Pixel<QPixelLayout::BPP16FPx4>,
129 fetch1Pixel<QPixelLayout::BPP32FPx4>,
130};
131
132#if QT_CONFIG(raster_64bit)
133static void QT_FASTCALL convertRGBA64ToRGBA64PM(QRgba64 *buffer, int count)
134{
135 for (int i = 0; i < count; ++i)
136 buffer[i] = buffer[i].premultiplied();
137}
138
139static void QT_FASTCALL convertRGBA64PMToRGBA64PM(QRgba64 *, int)
140{
141}
142
143static void QT_FASTCALL convertRGBA16FToRGBA64PM(QRgba64 *buffer, int count)
144{
145 const QRgbaFloat16 *in = reinterpret_cast<const QRgbaFloat16 *>(buffer);
146 for (int i = 0; i < count; ++i) {
147 QRgbaFloat16 c = in[i];
148 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16()).premultiplied();
149 }
150}
151
152static void QT_FASTCALL convertRGBA16FPMToRGBA64PM(QRgba64 *buffer, int count)
153{
154 const QRgbaFloat16 *in = reinterpret_cast<const QRgbaFloat16 *>(buffer);
155 for (int i = 0; i < count; ++i) {
156 QRgbaFloat16 c = in[i];
157 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16());
158 }
159}
160
161static void QT_FASTCALL convertRGBA32FToRGBA64PM(QRgba64 *buffer, int count)
162{
163 const QRgbaFloat32 *in = reinterpret_cast<const QRgbaFloat32 *>(buffer);
164 for (int i = 0; i < count; ++i) {
165 QRgbaFloat32 c = in[i];
166 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16()).premultiplied();
167 }
168}
169
170static void QT_FASTCALL convertRGBA32FPMToRGBA64PM(QRgba64 *buffer, int count)
171{
172 const QRgbaFloat32 *in = reinterpret_cast<const QRgbaFloat32 *>(buffer);
173 for (int i = 0; i < count; ++i) {
174 QRgbaFloat32 c = in[i];
175 buffer[i] = QRgba64::fromRgba64(red: c.red16(), green: c.green16(), blue: c.blue16(), alpha: c.alpha16());
176 }
177}
178
179static Convert64Func convert64ToRGBA64PM[QImage::NImageFormats] = {
180 nullptr,
181 nullptr,
182 nullptr,
183 nullptr,
184 nullptr,
185 nullptr,
186 nullptr,
187 nullptr,
188 nullptr,
189 nullptr,
190 nullptr,
191 nullptr,
192 nullptr,
193 nullptr,
194 nullptr,
195 nullptr,
196 nullptr,
197 nullptr,
198 nullptr,
199 nullptr,
200 nullptr,
201 nullptr,
202 nullptr,
203 nullptr,
204 nullptr,
205 convertRGBA64PMToRGBA64PM,
206 convertRGBA64ToRGBA64PM,
207 convertRGBA64PMToRGBA64PM,
208 nullptr,
209 nullptr,
210 convertRGBA16FPMToRGBA64PM,
211 convertRGBA16FToRGBA64PM,
212 convertRGBA16FPMToRGBA64PM,
213 convertRGBA32FPMToRGBA64PM,
214 convertRGBA32FToRGBA64PM,
215 convertRGBA32FPMToRGBA64PM,
216};
217#endif
218
219#if QT_CONFIG(raster_fp)
220static void QT_FASTCALL convertRGBA64PMToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
221{
222 const auto *in = reinterpret_cast<const QRgba64 *>(src);
223 for (int i = 0; i < count; ++i) {
224 auto c = in[i];
225 buffer[i] = QRgbaFloat32::fromRgba64(red: c.red(), green: c.green(), blue: c.blue(), alpha: c.alpha()).premultiplied();
226 }
227}
228
229static void QT_FASTCALL convertRGBA64ToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
230{
231 const auto *in = reinterpret_cast<const QRgba64 *>(src);
232 for (int i = 0; i < count; ++i) {
233 auto c = in[i];
234 buffer[i] = QRgbaFloat32::fromRgba64(red: c.red(), green: c.green(), blue: c.blue(), alpha: c.alpha());
235 }
236}
237
238static void QT_FASTCALL convertRGBA16FPMToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
239{
240 qFloatFromFloat16((float *)buffer, (const qfloat16 *)src, length: count * 4);
241 for (int i = 0; i < count; ++i)
242 buffer[i] = buffer[i].premultiplied();
243}
244
245static void QT_FASTCALL convertRGBA16FToRGBA32F(QRgbaFloat32 *buffer, const quint64 *src, int count)
246{
247 qFloatFromFloat16((float *)buffer, (const qfloat16 *)src, length: count * 4);
248}
249
250static Convert64ToFPFunc convert64ToRGBA32F[QImage::NImageFormats] = {
251 nullptr,
252 nullptr,
253 nullptr,
254 nullptr,
255 nullptr,
256 nullptr,
257 nullptr,
258 nullptr,
259 nullptr,
260 nullptr,
261 nullptr,
262 nullptr,
263 nullptr,
264 nullptr,
265 nullptr,
266 nullptr,
267 nullptr,
268 nullptr,
269 nullptr,
270 nullptr,
271 nullptr,
272 nullptr,
273 nullptr,
274 nullptr,
275 nullptr,
276 convertRGBA64ToRGBA32F,
277 convertRGBA64PMToRGBA32F,
278 convertRGBA64ToRGBA32F,
279 nullptr,
280 nullptr,
281 convertRGBA16FToRGBA32F,
282 convertRGBA16FPMToRGBA32F,
283 convertRGBA16FToRGBA32F,
284 nullptr,
285 nullptr,
286 nullptr,
287};
288
289static void convertRGBA32FToRGBA32FPM(QRgbaFloat32 *buffer, int count)
290{
291 for (int i = 0; i < count; ++i)
292 buffer[i] = buffer[i].premultiplied();
293}
294
295static void convertRGBA32FToRGBA32F(QRgbaFloat32 *, int)
296{
297}
298
299#endif
300
301/*
302 Destination fetch. This is simple as we don't have to do bounds checks or
303 transformations
304*/
305
306static uint * QT_FASTCALL destFetchMono(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
307{
308 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
309 uint *start = buffer;
310 const uint *end = buffer + length;
311 while (buffer < end) {
312 *buffer = data[x>>3] & (0x80 >> (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
313 ++buffer;
314 ++x;
315 }
316 return start;
317}
318
319static uint * QT_FASTCALL destFetchMonoLsb(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
320{
321 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
322 uint *start = buffer;
323 const uint *end = buffer + length;
324 while (buffer < end) {
325 *buffer = data[x>>3] & (0x1 << (x & 7)) ? rasterBuffer->destColor1 : rasterBuffer->destColor0;
326 ++buffer;
327 ++x;
328 }
329 return start;
330}
331
332static uint * QT_FASTCALL destFetchARGB32P(uint *, QRasterBuffer *rasterBuffer, int x, int y, int)
333{
334 return (uint *)rasterBuffer->scanLine(y) + x;
335}
336
337static uint * QT_FASTCALL destFetchRGB16(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
338{
339 const ushort *Q_DECL_RESTRICT data = (const ushort *)rasterBuffer->scanLine(y) + x;
340 for (int i = 0; i < length; ++i)
341 buffer[i] = qConvertRgb16To32(c: data[i]);
342 return buffer;
343}
344
345static uint *QT_FASTCALL destFetch(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
346{
347 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
348 return const_cast<uint *>(layout->fetchToARGB32PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
349}
350
351static uint *QT_FASTCALL destFetchUndefined(uint *buffer, QRasterBuffer *, int, int, int)
352{
353 return buffer;
354}
355
356static DestFetchProc destFetchProc[QImage::NImageFormats] =
357{
358 nullptr, // Format_Invalid
359 destFetchMono, // Format_Mono,
360 destFetchMonoLsb, // Format_MonoLSB
361 nullptr, // Format_Indexed8
362 destFetchARGB32P, // Format_RGB32
363 destFetch, // Format_ARGB32,
364 destFetchARGB32P, // Format_ARGB32_Premultiplied
365 destFetchRGB16, // Format_RGB16
366 destFetch, // Format_ARGB8565_Premultiplied
367 destFetch, // Format_RGB666
368 destFetch, // Format_ARGB6666_Premultiplied
369 destFetch, // Format_RGB555
370 destFetch, // Format_ARGB8555_Premultiplied
371 destFetch, // Format_RGB888
372 destFetch, // Format_RGB444
373 destFetch, // Format_ARGB4444_Premultiplied
374 destFetch, // Format_RGBX8888
375 destFetch, // Format_RGBA8888
376 destFetch, // Format_RGBA8888_Premultiplied
377 destFetch, // Format_BGR30
378 destFetch, // Format_A2BGR30_Premultiplied
379 destFetch, // Format_RGB30
380 destFetch, // Format_A2RGB30_Premultiplied
381 destFetch, // Format_Alpha8
382 destFetch, // Format_Grayscale8
383 destFetch, // Format_RGBX64
384 destFetch, // Format_RGBA64
385 destFetch, // Format_RGBA64_Premultiplied
386 destFetch, // Format_Grayscale16
387 destFetch, // Format_BGR888
388 destFetch, // Format_RGBX16FPx4
389 destFetch, // Format_RGBA16FPx4
390 destFetch, // Format_RGBA16FPx4_Premultiplied
391 destFetch, // Format_RGBX32FPx4
392 destFetch, // Format_RGBA32FPx4
393 destFetch, // Format_RGBA32FPx4_Premultiplied
394};
395
396#if QT_CONFIG(raster_64bit)
397static QRgba64 *QT_FASTCALL destFetch64(QRgba64 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
398{
399 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
400 return const_cast<QRgba64 *>(layout->fetchToRGBA64PM(buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
401}
402
403static QRgba64 * QT_FASTCALL destFetchRGB64(QRgba64 *, QRasterBuffer *rasterBuffer, int x, int y, int)
404{
405 return (QRgba64 *)rasterBuffer->scanLine(y) + x;
406}
407
408static QRgba64 * QT_FASTCALL destFetch64Undefined(QRgba64 *buffer, QRasterBuffer *, int, int, int)
409{
410 return buffer;
411}
412
413static DestFetchProc64 destFetchProc64[QImage::NImageFormats] =
414{
415 nullptr, // Format_Invalid
416 nullptr, // Format_Mono,
417 nullptr, // Format_MonoLSB
418 nullptr, // Format_Indexed8
419 destFetch64, // Format_RGB32
420 destFetch64, // Format_ARGB32,
421 destFetch64, // Format_ARGB32_Premultiplied
422 destFetch64, // Format_RGB16
423 destFetch64, // Format_ARGB8565_Premultiplied
424 destFetch64, // Format_RGB666
425 destFetch64, // Format_ARGB6666_Premultiplied
426 destFetch64, // Format_RGB555
427 destFetch64, // Format_ARGB8555_Premultiplied
428 destFetch64, // Format_RGB888
429 destFetch64, // Format_RGB444
430 destFetch64, // Format_ARGB4444_Premultiplied
431 destFetch64, // Format_RGBX8888
432 destFetch64, // Format_RGBA8888
433 destFetch64, // Format_RGBA8888_Premultiplied
434 destFetch64, // Format_BGR30
435 destFetch64, // Format_A2BGR30_Premultiplied
436 destFetch64, // Format_RGB30
437 destFetch64, // Format_A2RGB30_Premultiplied
438 destFetch64, // Format_Alpha8
439 destFetch64, // Format_Grayscale8
440 destFetchRGB64, // Format_RGBX64
441 destFetch64, // Format_RGBA64
442 destFetchRGB64, // Format_RGBA64_Premultiplied
443 destFetch64, // Format_Grayscale16
444 destFetch64, // Format_BGR888
445 destFetch64, // Format_RGBX16FPx4
446 destFetch64, // Format_RGBA16FPx4
447 destFetch64, // Format_RGBA16FPx4_Premultiplied
448 destFetch64, // Format_RGBX32FPx4
449 destFetch64, // Format_RGBA32FPx4
450 destFetch64, // Format_RGBA32FPx4_Premultiplied
451};
452#endif
453
454#if QT_CONFIG(raster_fp)
455static QRgbaFloat32 *QT_FASTCALL destFetchFP(QRgbaFloat32 *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
456{
457 return const_cast<QRgbaFloat32 *>(qFetchToRGBA32F[rasterBuffer->format](buffer, rasterBuffer->scanLine(y), x, length, nullptr, nullptr));
458}
459
460static QRgbaFloat32 *QT_FASTCALL destFetchRGBFP(QRgbaFloat32 *, QRasterBuffer *rasterBuffer, int x, int y, int)
461{
462 return reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->scanLine(y)) + x;
463}
464
465static QRgbaFloat32 *QT_FASTCALL destFetchFPUndefined(QRgbaFloat32 *buffer, QRasterBuffer *, int, int, int)
466{
467 return buffer;
468}
469static DestFetchProcFP destFetchProcFP[QImage::NImageFormats] =
470{
471 nullptr, // Format_Invalid
472 nullptr, // Format_Mono,
473 nullptr, // Format_MonoLSB
474 nullptr, // Format_Indexed8
475 destFetchFP, // Format_RGB32
476 destFetchFP, // Format_ARGB32,
477 destFetchFP, // Format_ARGB32_Premultiplied
478 destFetchFP, // Format_RGB16
479 destFetchFP, // Format_ARGB8565_Premultiplied
480 destFetchFP, // Format_RGB666
481 destFetchFP, // Format_ARGB6666_Premultiplied
482 destFetchFP, // Format_RGB555
483 destFetchFP, // Format_ARGB8555_Premultiplied
484 destFetchFP, // Format_RGB888
485 destFetchFP, // Format_RGB444
486 destFetchFP, // Format_ARGB4444_Premultiplied
487 destFetchFP, // Format_RGBX8888
488 destFetchFP, // Format_RGBA8888
489 destFetchFP, // Format_RGBA8888_Premultiplied
490 destFetchFP, // Format_BGR30
491 destFetchFP, // Format_A2BGR30_Premultiplied
492 destFetchFP, // Format_RGB30
493 destFetchFP, // Format_A2RGB30_Premultiplied
494 destFetchFP, // Format_Alpha8
495 destFetchFP, // Format_Grayscale8
496 destFetchFP, // Format_RGBX64
497 destFetchFP, // Format_RGBA64
498 destFetchFP, // Format_RGBA64_Premultiplied
499 destFetchFP, // Format_Grayscale16
500 destFetchFP, // Format_BGR888
501 destFetchFP, // Format_RGBX16FPx4
502 destFetchFP, // Format_RGBA16FPx4
503 destFetchFP, // Format_RGBA16FPx4_Premultiplied
504 destFetchRGBFP, // Format_RGBX32FPx4
505 destFetchFP, // Format_RGBA32FPx4
506 destFetchRGBFP, // Format_RGBA32FPx4_Premultiplied
507};
508#endif
509
510/*
511 Returns the color in the mono destination color table
512 that is the "nearest" to /color/.
513*/
514static inline QRgb findNearestColor(QRgb color, QRasterBuffer *rbuf)
515{
516 QRgb color_0 = qPremultiply(x: rbuf->destColor0);
517 QRgb color_1 = qPremultiply(x: rbuf->destColor1);
518 color = qPremultiply(x: color);
519
520 int r = qRed(rgb: color);
521 int g = qGreen(rgb: color);
522 int b = qBlue(rgb: color);
523 int rx, gx, bx;
524 int dist_0, dist_1;
525
526 rx = r - qRed(rgb: color_0);
527 gx = g - qGreen(rgb: color_0);
528 bx = b - qBlue(rgb: color_0);
529 dist_0 = rx*rx + gx*gx + bx*bx;
530
531 rx = r - qRed(rgb: color_1);
532 gx = g - qGreen(rgb: color_1);
533 bx = b - qBlue(rgb: color_1);
534 dist_1 = rx*rx + gx*gx + bx*bx;
535
536 if (dist_0 < dist_1)
537 return color_0;
538 return color_1;
539}
540
541/*
542 Destination store.
543*/
544
545static void QT_FASTCALL destStoreMono(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
546{
547 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
548 if (rasterBuffer->monoDestinationWithClut) {
549 for (int i = 0; i < length; ++i) {
550 if (buffer[i] == rasterBuffer->destColor0) {
551 data[x >> 3] &= ~(0x80 >> (x & 7));
552 } else if (buffer[i] == rasterBuffer->destColor1) {
553 data[x >> 3] |= 0x80 >> (x & 7);
554 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
555 data[x >> 3] &= ~(0x80 >> (x & 7));
556 } else {
557 data[x >> 3] |= 0x80 >> (x & 7);
558 }
559 ++x;
560 }
561 } else {
562 for (int i = 0; i < length; ++i) {
563 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
564 data[x >> 3] |= 0x80 >> (x & 7);
565 else
566 data[x >> 3] &= ~(0x80 >> (x & 7));
567 ++x;
568 }
569 }
570}
571
572static void QT_FASTCALL destStoreMonoLsb(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
573{
574 uchar *Q_DECL_RESTRICT data = (uchar *)rasterBuffer->scanLine(y);
575 if (rasterBuffer->monoDestinationWithClut) {
576 for (int i = 0; i < length; ++i) {
577 if (buffer[i] == rasterBuffer->destColor0) {
578 data[x >> 3] &= ~(1 << (x & 7));
579 } else if (buffer[i] == rasterBuffer->destColor1) {
580 data[x >> 3] |= 1 << (x & 7);
581 } else if (findNearestColor(color: buffer[i], rbuf: rasterBuffer) == rasterBuffer->destColor0) {
582 data[x >> 3] &= ~(1 << (x & 7));
583 } else {
584 data[x >> 3] |= 1 << (x & 7);
585 }
586 ++x;
587 }
588 } else {
589 for (int i = 0; i < length; ++i) {
590 if (qGray(rgb: buffer[i]) < int(qt_bayer_matrix[y & 15][x & 15]))
591 data[x >> 3] |= 1 << (x & 7);
592 else
593 data[x >> 3] &= ~(1 << (x & 7));
594 ++x;
595 }
596 }
597}
598
599static void QT_FASTCALL destStoreRGB16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
600{
601 quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
602 for (int i = 0; i < length; ++i)
603 data[i] = qConvertRgb32To16(c: buffer[i]);
604}
605
606static void QT_FASTCALL destStore(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
607{
608 const QPixelLayout *layout = &qPixelLayouts[rasterBuffer->format];
609 ConvertAndStorePixelsFunc store = layout->storeFromARGB32PM;
610 if (!layout->premultiplied && !layout->hasAlphaChannel)
611 store = layout->storeFromRGB32;
612 uchar *dest = rasterBuffer->scanLine(y);
613 store(dest, buffer, x, length, nullptr, nullptr);
614}
615
616static void QT_FASTCALL destStoreGray8(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
617{
618 uchar *data = rasterBuffer->scanLine(y) + x;
619
620 bool failed = false;
621 for (int k = 0; k < length; ++k) {
622 if (!qIsGray(rgb: buffer[k])) {
623 failed = true;
624 break;
625 }
626 data[k] = qRed(rgb: buffer[k]);
627 }
628 if (failed) { // Non-gray colors
629 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
630 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
631 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
632
633 tfd->apply(dst: data, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
634 }
635}
636
637static void QT_FASTCALL destStoreGray16(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
638{
639 quint16 *data = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
640
641 bool failed = false;
642 for (int k = 0; k < length; ++k) {
643 if (!qIsGray(rgb: buffer[k])) {
644 failed = true;
645 break;
646 }
647 data[k] = qRed(rgb: buffer[k]) * 257;
648 }
649 if (failed) { // Non-gray colors
650 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
651 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
652 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
653
654 QRgba64 tmp_line[BufferSize];
655 for (int k = 0; k < length; ++k)
656 tmp_line[k] = QRgba64::fromArgb32(rgb: buffer[k]);
657 tfd->apply(dst: data, src: tmp_line, count: length, flags: QColorTransformPrivate::InputPremultiplied);
658 }
659}
660
661static DestStoreProc destStoreProc[QImage::NImageFormats] =
662{
663 nullptr, // Format_Invalid
664 destStoreMono, // Format_Mono,
665 destStoreMonoLsb, // Format_MonoLSB
666 nullptr, // Format_Indexed8
667 nullptr, // Format_RGB32
668 destStore, // Format_ARGB32,
669 nullptr, // Format_ARGB32_Premultiplied
670 destStoreRGB16, // Format_RGB16
671 destStore, // Format_ARGB8565_Premultiplied
672 destStore, // Format_RGB666
673 destStore, // Format_ARGB6666_Premultiplied
674 destStore, // Format_RGB555
675 destStore, // Format_ARGB8555_Premultiplied
676 destStore, // Format_RGB888
677 destStore, // Format_RGB444
678 destStore, // Format_ARGB4444_Premultiplied
679 destStore, // Format_RGBX8888
680 destStore, // Format_RGBA8888
681 destStore, // Format_RGBA8888_Premultiplied
682 destStore, // Format_BGR30
683 destStore, // Format_A2BGR30_Premultiplied
684 destStore, // Format_RGB30
685 destStore, // Format_A2RGB30_Premultiplied
686 destStore, // Format_Alpha8
687 destStoreGray8, // Format_Grayscale8
688 destStore, // Format_RGBX64
689 destStore, // Format_RGBA64
690 destStore, // Format_RGBA64_Premultiplied
691 destStoreGray16, // Format_Grayscale16
692 destStore, // Format_BGR888
693 destStore, // Format_RGBX16FPx4
694 destStore, // Format_RGBA16FPx4
695 destStore, // Format_RGBA16FPx4_Premultiplied
696 destStore, // Format_RGBX32FPx4
697 destStore, // Format_RGBA32FPx4
698 destStore, // Format_RGBA32FPx4_Premultiplied
699};
700
701#if QT_CONFIG(raster_64bit)
702static void QT_FASTCALL destStore64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
703{
704 auto store = qStoreFromRGBA64PM[rasterBuffer->format];
705 uchar *dest = rasterBuffer->scanLine(y);
706 store(dest, buffer, x, length, nullptr, nullptr);
707}
708
709static void QT_FASTCALL destStore64RGBA64(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
710{
711 QRgba64 *dest = reinterpret_cast<QRgba64*>(rasterBuffer->scanLine(y)) + x;
712 for (int i = 0; i < length; ++i) {
713 dest[i] = buffer[i].unpremultiplied();
714 }
715}
716
717static void QT_FASTCALL destStore64Gray8(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
718{
719 uchar *data = rasterBuffer->scanLine(y) + x;
720
721 bool failed = false;
722 for (int k = 0; k < length; ++k) {
723 if (buffer[k].red() != buffer[k].green() || buffer[k].red() != buffer[k].blue()) {
724 failed = true;
725 break;
726 }
727 data[k] = buffer[k].red8();
728 }
729 if (failed) { // Non-gray colors
730 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
731 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
732 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
733
734 quint16 gray_line[BufferSize];
735 tfd->apply(dst: gray_line, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
736 for (int k = 0; k < length; ++k)
737 data[k] = qt_div_257(x: gray_line[k]);
738 }
739}
740
741static void QT_FASTCALL destStore64Gray16(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length)
742{
743 quint16 *data = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
744
745 bool failed = false;
746 for (int k = 0; k < length; ++k) {
747 if (buffer[k].red() != buffer[k].green() || buffer[k].red() != buffer[k].blue()) {
748 failed = true;
749 break;
750 }
751 data[k] = buffer[k].red();
752 }
753 if (failed) { // Non-gray colors
754 QColorSpace fromCS = rasterBuffer->colorSpace.isValid() ? rasterBuffer->colorSpace : QColorSpace::SRgb;
755 QColorTransform tf = QColorSpacePrivate::get(colorSpace&: fromCS)->transformationToXYZ();
756 QColorTransformPrivate *tfd = QColorTransformPrivate::get(q: tf);
757 tfd->apply(dst: data, src: buffer, count: length, flags: QColorTransformPrivate::InputPremultiplied);
758 }
759}
760
761static DestStoreProc64 destStoreProc64[QImage::NImageFormats] =
762{
763 nullptr, // Format_Invalid
764 nullptr, // Format_Mono,
765 nullptr, // Format_MonoLSB
766 nullptr, // Format_Indexed8
767 destStore64, // Format_RGB32
768 destStore64, // Format_ARGB32,
769 destStore64, // Format_ARGB32_Premultiplied
770 destStore64, // Format_RGB16
771 destStore64, // Format_ARGB8565_Premultiplied
772 destStore64, // Format_RGB666
773 destStore64, // Format_ARGB6666_Premultiplied
774 destStore64, // Format_RGB555
775 destStore64, // Format_ARGB8555_Premultiplied
776 destStore64, // Format_RGB888
777 destStore64, // Format_RGB444
778 destStore64, // Format_ARGB4444_Premultiplied
779 destStore64, // Format_RGBX8888
780 destStore64, // Format_RGBA8888
781 destStore64, // Format_RGBA8888_Premultiplied
782 destStore64, // Format_BGR30
783 destStore64, // Format_A2BGR30_Premultiplied
784 destStore64, // Format_RGB30
785 destStore64, // Format_A2RGB30_Premultiplied
786 destStore64, // Format_Alpha8
787 destStore64Gray8, // Format_Grayscale8
788 nullptr, // Format_RGBX64
789 destStore64RGBA64, // Format_RGBA64
790 nullptr, // Format_RGBA64_Premultiplied
791 destStore64Gray16, // Format_Grayscale16
792 destStore64, // Format_BGR888
793 destStore64, // Format_RGBX16FPx4
794 destStore64, // Format_RGBA16FPx4
795 destStore64, // Format_RGBA16FPx4_Premultiplied
796 destStore64, // Format_RGBX32FPx4
797 destStore64, // Format_RGBA32FPx4
798 destStore64, // Format_RGBA32FPx4_Premultiplied
799};
800#endif
801
802#if QT_CONFIG(raster_fp)
803static void QT_FASTCALL destStoreFP(QRasterBuffer *rasterBuffer, int x, int y, const QRgbaFloat32 *buffer, int length)
804{
805 auto store = qStoreFromRGBA32F[rasterBuffer->format];
806 uchar *dest = rasterBuffer->scanLine(y);
807 store(dest, buffer, x, length, nullptr, nullptr);
808}
809#endif
810
811/*
812 Source fetches
813
814 This is a bit more complicated, as we need several fetch routines for every surface type
815
816 We need 5 fetch methods per surface type:
817 untransformed
818 transformed (tiled and not tiled)
819 transformed bilinear (tiled and not tiled)
820
821 We don't need bounds checks for untransformed, but we need them for the other ones.
822
823 The generic implementation does pixel by pixel fetches
824*/
825
826enum TextureBlendType {
827 BlendUntransformed,
828 BlendTiled,
829 BlendTransformed,
830 BlendTransformedTiled,
831 BlendTransformedBilinear,
832 BlendTransformedBilinearTiled,
833 NBlendTypes
834};
835
836static const uint *QT_FASTCALL fetchUntransformed(uint *buffer, const Operator *,
837 const QSpanData *data, int y, int x, int length)
838{
839 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
840 return layout->fetchToARGB32PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
841}
842
843static const uint *QT_FASTCALL fetchUntransformedARGB32PM(uint *, const Operator *,
844 const QSpanData *data, int y, int x, int)
845{
846 const uchar *scanLine = data->texture.scanLine(y);
847 return reinterpret_cast<const uint *>(scanLine) + x;
848}
849
850static const uint *QT_FASTCALL fetchUntransformedRGB16(uint *buffer, const Operator *,
851 const QSpanData *data, int y, int x,
852 int length)
853{
854 const quint16 *scanLine = (const quint16 *)data->texture.scanLine(y) + x;
855 for (int i = 0; i < length; ++i)
856 buffer[i] = qConvertRgb16To32(c: scanLine[i]);
857 return buffer;
858}
859
860#if QT_CONFIG(raster_64bit)
861static const QRgba64 *QT_FASTCALL fetchUntransformed64(QRgba64 *buffer, const Operator *,
862 const QSpanData *data, int y, int x, int length)
863{
864 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
865 return layout->fetchToRGBA64PM(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
866}
867
868static const QRgba64 *QT_FASTCALL fetchUntransformedRGBA64PM(QRgba64 *, const Operator *,
869 const QSpanData *data, int y, int x, int)
870{
871 const uchar *scanLine = data->texture.scanLine(y);
872 return reinterpret_cast<const QRgba64 *>(scanLine) + x;
873}
874#endif
875
876#if QT_CONFIG(raster_fp)
877static const QRgbaFloat32 *QT_FASTCALL fetchUntransformedFP(QRgbaFloat32 *buffer, const Operator *,
878 const QSpanData *data, int y, int x, int length)
879{
880 const auto fetch = qFetchToRGBA32F[data->texture.format];
881 return fetch(buffer, data->texture.scanLine(y), x, length, data->texture.colorTable, nullptr);
882}
883#endif
884
885template<TextureBlendType blendType>
886inline void fetchTransformed_pixelBounds(int max, int l1, int l2, int &v)
887{
888 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
889 if (blendType == BlendTransformedTiled) {
890 if (v < 0 || v >= max) {
891 v %= max;
892 if (v < 0) v += max;
893 }
894 } else {
895 v = qBound(min: l1, val: v, max: l2);
896 }
897}
898
899static inline bool canUseFastMatrixPath(const qreal cx, const qreal cy, const qsizetype length, const QSpanData *data)
900{
901 if (Q_UNLIKELY(!data->fast_matrix))
902 return false;
903
904 qreal fx = (data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale;
905 qreal fy = (data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale;
906 qreal minc = std::min(a: fx, b: fy);
907 qreal maxc = std::max(a: fx, b: fy);
908 fx += std::trunc(x: data->m11 * fixed_scale) * length;
909 fy += std::trunc(x: data->m12 * fixed_scale) * length;
910 minc = std::min(a: minc, b: std::min(a: fx, b: fy));
911 maxc = std::max(a: maxc, b: std::max(a: fx, b: fy));
912
913 return minc >= std::numeric_limits<int>::min() && maxc <= std::numeric_limits<int>::max();
914}
915
916template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
917static void QT_FASTCALL fetchTransformed_fetcher(T *buffer, const QSpanData *data,
918 int y, int x, int length)
919{
920 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
921 const QTextureData &image = data->texture;
922
923 const qreal cx = x + qreal(0.5);
924 const qreal cy = y + qreal(0.5);
925
926 constexpr bool useFetch = (bpp < QPixelLayout::BPP32) && sizeof(T) == sizeof(uint);
927 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
928 if (!useFetch)
929 Q_ASSERT(layout->bpp == bpp || (layout->bpp == QPixelLayout::BPP16FPx4 && bpp == QPixelLayout::BPP64));
930 // When templated 'fetch' should be inlined at compile time:
931 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout->bpp] : Fetch1PixelFunc(fetch1Pixel<bpp>);
932
933 if (canUseFastMatrixPath(cx, cy, length, data)) {
934 // The increment pr x in the scanline
935 int fdx = (int)(data->m11 * fixed_scale);
936 int fdy = (int)(data->m12 * fixed_scale);
937
938 int fx = int((data->m21 * cy
939 + data->m11 * cx + data->dx) * fixed_scale);
940 int fy = int((data->m22 * cy
941 + data->m12 * cx + data->dy) * fixed_scale);
942
943 if (fdy == 0) { // simple scale, no rotation or shear
944 int py = (fy >> 16);
945 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
946 const uchar *src = image.scanLine(y: py);
947
948 int i = 0;
949 if (blendType == BlendTransformed) {
950 int fastLen = length;
951 if (fdx > 0)
952 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
953 else if (fdx < 0)
954 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
955
956 for (; i < fastLen; ++i) {
957 int x1 = (fx >> 16);
958 int x2 = x1;
959 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
960 if (x1 == x2)
961 break;
962 if constexpr (useFetch)
963 buffer[i] = fetch1(src, x1);
964 else
965 buffer[i] = reinterpret_cast<const T*>(src)[x1];
966 fx += fdx;
967 }
968
969 for (; i < fastLen; ++i) {
970 int px = (fx >> 16);
971 if constexpr (useFetch)
972 buffer[i] = fetch1(src, px);
973 else
974 buffer[i] = reinterpret_cast<const T*>(src)[px];
975 fx += fdx;
976 }
977 }
978
979 for (; i < length; ++i) {
980 int px = (fx >> 16);
981 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
982 if constexpr (useFetch)
983 buffer[i] = fetch1(src, px);
984 else
985 buffer[i] = reinterpret_cast<const T*>(src)[px];
986 fx += fdx;
987 }
988 } else { // rotation or shear
989 int i = 0;
990 if (blendType == BlendTransformed) {
991 int fastLen = length;
992 if (fdx > 0)
993 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
994 else if (fdx < 0)
995 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
996 if (fdy > 0)
997 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
998 else if (fdy < 0)
999 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
1000
1001 for (; i < fastLen; ++i) {
1002 int x1 = (fx >> 16);
1003 int y1 = (fy >> 16);
1004 int x2 = x1;
1005 int y2 = y1;
1006 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1);
1007 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1);
1008 if (x1 == x2 && y1 == y2)
1009 break;
1010 if constexpr (useFetch)
1011 buffer[i] = fetch1(image.scanLine(y: y1), x1);
1012 else
1013 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: y1))[x1];
1014 fx += fdx;
1015 fy += fdy;
1016 }
1017
1018 for (; i < fastLen; ++i) {
1019 int px = (fx >> 16);
1020 int py = (fy >> 16);
1021 if constexpr (useFetch)
1022 buffer[i] = fetch1(image.scanLine(y: py), px);
1023 else
1024 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1025 fx += fdx;
1026 fy += fdy;
1027 }
1028 }
1029
1030 for (; i < length; ++i) {
1031 int px = (fx >> 16);
1032 int py = (fy >> 16);
1033 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
1034 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
1035 if constexpr (useFetch)
1036 buffer[i] = fetch1(image.scanLine(y: py), px);
1037 else
1038 buffer[i] = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1039 fx += fdx;
1040 fy += fdy;
1041 }
1042 }
1043 } else {
1044 const qreal fdx = data->m11;
1045 const qreal fdy = data->m12;
1046 const qreal fdw = data->m13;
1047
1048 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
1049 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
1050 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
1051
1052 T *const end = buffer + length;
1053 T *b = buffer;
1054 while (b < end) {
1055 const qreal iw = fw == 0 ? 1 : 1 / fw;
1056 const qreal tx = fx * iw;
1057 const qreal ty = fy * iw;
1058 int px = qFloor(v: tx);
1059 int py = qFloor(v: ty);
1060
1061 fetchTransformed_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, py);
1062 fetchTransformed_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, px);
1063 if constexpr (useFetch)
1064 *b = fetch1(image.scanLine(y: py), px);
1065 else
1066 *b = reinterpret_cast<const T*>(image.scanLine(y: py))[px];
1067
1068 fx += fdx;
1069 fy += fdy;
1070 fw += fdw;
1071 //force increment to avoid /0
1072 if (!fw) {
1073 fw += fdw;
1074 }
1075 ++b;
1076 }
1077 }
1078}
1079
1080template<TextureBlendType blendType, QPixelLayout::BPP bpp>
1081static const uint *QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const QSpanData *data,
1082 int y, int x, int length)
1083{
1084 static_assert(blendType == BlendTransformed || blendType == BlendTransformedTiled);
1085 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1086 fetchTransformed_fetcher<blendType, bpp, uint>(buffer, data, y, x, length);
1087 layout->convertToARGB32PM(buffer, length, data->texture.colorTable);
1088 return buffer;
1089}
1090
1091#if QT_CONFIG(raster_64bit)
1092template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
1093static const QRgba64 *QT_FASTCALL fetchTransformed64(QRgba64 *buffer, const Operator *, const QSpanData *data,
1094 int y, int x, int length)
1095{
1096 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1097 if (layout->bpp < QPixelLayout::BPP64) {
1098 uint buffer32[BufferSize];
1099 Q_ASSERT(length <= BufferSize);
1100 if (layout->bpp == QPixelLayout::BPP32)
1101 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
1102 else
1103 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
1104 return layout->convertToRGBA64PM(buffer, buffer32, length, data->texture.colorTable, nullptr);
1105 }
1106
1107 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, quint64>(reinterpret_cast<quint64*>(buffer), data, y, x, length);
1108 if (auto convert = convert64ToRGBA64PM[data->texture.format])
1109 convert(buffer, length);
1110 return buffer;
1111}
1112#endif
1113
1114#if QT_CONFIG(raster_fp)
1115template<TextureBlendType blendType> /* either BlendTransformed or BlendTransformedTiled */
1116static const QRgbaFloat32 *QT_FASTCALL fetchTransformedFP(QRgbaFloat32 *buffer, const Operator *, const QSpanData *data,
1117 int y, int x, int length)
1118{
1119 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
1120 if (layout->bpp < QPixelLayout::BPP64) {
1121 uint buffer32[BufferSize];
1122 Q_ASSERT(length <= BufferSize);
1123 if (layout->bpp == QPixelLayout::BPP32)
1124 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32, uint>(buffer32, data, y, x, length);
1125 else
1126 fetchTransformed_fetcher<blendType, QPixelLayout::BPPNone, uint>(buffer32, data, y, x, length);
1127 qConvertToRGBA32F[data->texture.format](buffer, buffer32, length, data->texture.colorTable, nullptr);
1128 } else if (layout->bpp < QPixelLayout::BPP32FPx4) {
1129 quint64 buffer64[BufferSize];
1130 fetchTransformed_fetcher<blendType, QPixelLayout::BPP64, quint64>(buffer64, data, y, x, length);
1131 convert64ToRGBA32F[data->texture.format](buffer, buffer64, length);
1132 } else {
1133 fetchTransformed_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>(buffer, data, y, x, length);
1134 if (data->texture.format == QImage::Format_RGBA32FPx4)
1135 convertRGBA32FToRGBA32FPM(buffer, count: length);
1136 return buffer;
1137 }
1138 return buffer;
1139}
1140#endif
1141
1142/** \internal
1143 interpolate 4 argb pixels with the distx and disty factor.
1144 distx and disty must be between 0 and 16
1145 */
1146static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
1147{
1148 uint distxy = distx * disty;
1149 //idistx * disty = (16-distx) * disty = 16*disty - distxy
1150 //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*disty + distxy
1151 uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy);
1152 uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy);
1153 uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy));
1154 uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy));
1155 uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy));
1156 uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy));
1157 uint brrb = ((br & 0x00ff00ff) * (distxy));
1158 uint brag = (((br & 0xff00ff00) >> 8) * (distxy));
1159 return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00);
1160}
1161
1162#if defined(__SSE2__)
1163#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \
1164{ \
1165 const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \
1166 const __m128i distx_ = _mm_slli_epi16(distx, 4); \
1167 const __m128i disty_ = _mm_slli_epi16(disty, 4); \
1168 const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \
1169 const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \
1170 const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \
1171 \
1172 __m128i tlAG = _mm_srli_epi16(tl, 8); \
1173 __m128i tlRB = _mm_and_si128(tl, colorMask); \
1174 __m128i trAG = _mm_srli_epi16(tr, 8); \
1175 __m128i trRB = _mm_and_si128(tr, colorMask); \
1176 __m128i blAG = _mm_srli_epi16(bl, 8); \
1177 __m128i blRB = _mm_and_si128(bl, colorMask); \
1178 __m128i brAG = _mm_srli_epi16(br, 8); \
1179 __m128i brRB = _mm_and_si128(br, colorMask); \
1180 \
1181 tlAG = _mm_mullo_epi16(tlAG, idxidy); \
1182 tlRB = _mm_mullo_epi16(tlRB, idxidy); \
1183 trAG = _mm_mullo_epi16(trAG, dxidy); \
1184 trRB = _mm_mullo_epi16(trRB, dxidy); \
1185 blAG = _mm_mullo_epi16(blAG, idxdy); \
1186 blRB = _mm_mullo_epi16(blRB, idxdy); \
1187 brAG = _mm_mullo_epi16(brAG, dxdy); \
1188 brRB = _mm_mullo_epi16(brRB, dxdy); \
1189 \
1190 /* Add the values, and shift to only keep 8 significant bits per colors */ \
1191 __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \
1192 __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \
1193 rAG = _mm_andnot_si128(colorMask, rAG); \
1194 rRB = _mm_srli_epi16(rRB, 8); \
1195 _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \
1196}
1197#endif
1198
1199#if defined(__ARM_NEON__)
1200#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \
1201{ \
1202 const int16x8_t dxdy = vmulq_s16(distx, disty); \
1203 const int16x8_t distx_ = vshlq_n_s16(distx, 4); \
1204 const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \
1205 const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \
1206 const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \
1207 \
1208 int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \
1209 int16x8_t tlRB = vandq_s16(tl, colorMask); \
1210 int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \
1211 int16x8_t trRB = vandq_s16(tr, colorMask); \
1212 int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \
1213 int16x8_t blRB = vandq_s16(bl, colorMask); \
1214 int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \
1215 int16x8_t brRB = vandq_s16(br, colorMask); \
1216 \
1217 int16x8_t rAG = vmulq_s16(tlAG, idxidy); \
1218 int16x8_t rRB = vmulq_s16(tlRB, idxidy); \
1219 rAG = vmlaq_s16(rAG, trAG, dxidy); \
1220 rRB = vmlaq_s16(rRB, trRB, dxidy); \
1221 rAG = vmlaq_s16(rAG, blAG, idxdy); \
1222 rRB = vmlaq_s16(rRB, blRB, idxdy); \
1223 rAG = vmlaq_s16(rAG, brAG, dxdy); \
1224 rRB = vmlaq_s16(rRB, brRB, dxdy); \
1225 \
1226 rAG = vandq_s16(invColorMask, rAG); \
1227 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \
1228 vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \
1229}
1230#endif
1231
1232template<TextureBlendType blendType>
1233void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2);
1234
1235template<>
1236inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinearTiled>(int max, int, int, int &v1, int &v2)
1237{
1238 v1 %= max;
1239 if (v1 < 0)
1240 v1 += max;
1241 v2 = v1 + 1;
1242 if (v2 == max)
1243 v2 = 0;
1244 Q_ASSERT(v1 >= 0 && v1 < max);
1245 Q_ASSERT(v2 >= 0 && v2 < max);
1246}
1247
1248template<>
1249inline void fetchTransformedBilinear_pixelBounds<BlendTransformedBilinear>(int, int l1, int l2, int &v1, int &v2)
1250{
1251 if (v1 < l1)
1252 v2 = v1 = l1;
1253 else if (v1 >= l2)
1254 v2 = v1 = l2;
1255 else
1256 v2 = v1 + 1;
1257 Q_ASSERT(v1 >= l1 && v1 <= l2);
1258 Q_ASSERT(v2 >= l1 && v2 <= l2);
1259}
1260
1261enum FastTransformTypes {
1262 SimpleScaleTransform,
1263 UpscaleTransform,
1264 DownscaleTransform,
1265 RotateTransform,
1266 FastRotateTransform,
1267 NFastTransformTypes
1268};
1269
1270// Completes the partial interpolation stored in IntermediateBuffer.
1271// by performing the x-axis interpolation and joining the RB and AG buffers.
1272static void QT_FASTCALL intermediate_adder(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx)
1273{
1274#if defined(QT_COMPILER_SUPPORTS_AVX2)
1275 extern void QT_FASTCALL intermediate_adder_avx2(uint *b, uint *end, const IntermediateBuffer &intermediate, int offset, int &fx, int fdx);
1276 if (qCpuHasFeature(ArchHaswell))
1277 return intermediate_adder_avx2(b, end, intermediate, offset, fx, fdx);
1278#endif
1279
1280 // Switch to intermediate buffer coordinates
1281 fx -= offset * fixed_scale;
1282
1283 while (b < end) {
1284 const int x = (fx >> 16);
1285
1286 const uint distx = (fx & 0x0000ffff) >> 8;
1287 const uint idistx = 256 - distx;
1288 const uint rb = (intermediate.buffer_rb[x] * idistx + intermediate.buffer_rb[x + 1] * distx) & 0xff00ff00;
1289 const uint ag = (intermediate.buffer_ag[x] * idistx + intermediate.buffer_ag[x + 1] * distx) & 0xff00ff00;
1290 *b = (rb >> 8) | ag;
1291 b++;
1292 fx += fdx;
1293 }
1294 fx += offset * fixed_scale;
1295}
1296
1297typedef void (QT_FASTCALL *BilinearFastTransformHelper)(uint *b, uint *end, const QTextureData &image, int &fx, int &fy, int fdx, int fdy);
1298
1299template<TextureBlendType blendType>
1300static void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
1301 int &fx, int &fy, int fdx, int /*fdy*/)
1302{
1303 int y1 = (fy >> 16);
1304 int y2;
1305 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1306 const uint *s1 = (const uint *)image.scanLine(y: y1);
1307 const uint *s2 = (const uint *)image.scanLine(y: y2);
1308
1309 const int disty = (fy & 0x0000ffff) >> 8;
1310 const int idisty = 256 - disty;
1311 const int length = end - b;
1312
1313 // The intermediate buffer is generated in the positive direction
1314 const int adjust = (fdx < 0) ? fdx * length : 0;
1315 const int offset = (fx + adjust) >> 16;
1316 int x = offset;
1317
1318 IntermediateBuffer intermediate;
1319 // count is the size used in the intermediate.buffer.
1320 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
1321 // length is supposed to be <= BufferSize either because data->m11 < 1 or
1322 // data->m11 < 2, and any larger buffers split
1323 Q_ASSERT(count <= BufferSize + 2);
1324 int f = 0;
1325 int lim = count;
1326 if (blendType == BlendTransformedBilinearTiled) {
1327 x %= image.width;
1328 if (x < 0) x += image.width;
1329 } else {
1330 lim = qMin(a: count, b: image.x2 - x);
1331 if (x < image.x1) {
1332 Q_ASSERT(x < image.x2);
1333 uint t = s1[image.x1];
1334 uint b = s2[image.x1];
1335 quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
1336 quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
1337 do {
1338 intermediate.buffer_rb[f] = rb;
1339 intermediate.buffer_ag[f] = ag;
1340 f++;
1341 x++;
1342 } while (x < image.x1 && f < lim);
1343 }
1344 }
1345
1346 if (blendType != BlendTransformedBilinearTiled) {
1347#if defined(__SSE2__)
1348 const __m128i disty_ = _mm_set1_epi16(w: disty);
1349 const __m128i idisty_ = _mm_set1_epi16(w: idisty);
1350 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1351
1352 lim -= 3;
1353 for (; f < lim; x += 4, f += 4) {
1354 // Load 4 pixels from s1, and split the alpha-green and red-blue component
1355 __m128i top = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s1)+x));
1356 __m128i topAG = _mm_srli_epi16(a: top, count: 8);
1357 __m128i topRB = _mm_and_si128(a: top, b: colorMask);
1358 // Multiplies each color component by idisty
1359 topAG = _mm_mullo_epi16 (a: topAG, b: idisty_);
1360 topRB = _mm_mullo_epi16 (a: topRB, b: idisty_);
1361
1362 // Same for the s2 vector
1363 __m128i bottom = _mm_loadu_si128(p: (const __m128i*)((const uint *)(s2)+x));
1364 __m128i bottomAG = _mm_srli_epi16(a: bottom, count: 8);
1365 __m128i bottomRB = _mm_and_si128(a: bottom, b: colorMask);
1366 bottomAG = _mm_mullo_epi16 (a: bottomAG, b: disty_);
1367 bottomRB = _mm_mullo_epi16 (a: bottomRB, b: disty_);
1368
1369 // Add the values, and shift to only keep 8 significant bits per colors
1370 __m128i rAG =_mm_add_epi16(a: topAG, b: bottomAG);
1371 rAG = _mm_srli_epi16(a: rAG, count: 8);
1372 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_ag[f]), b: rAG);
1373 __m128i rRB =_mm_add_epi16(a: topRB, b: bottomRB);
1374 rRB = _mm_srli_epi16(a: rRB, count: 8);
1375 _mm_storeu_si128(p: (__m128i*)(&intermediate.buffer_rb[f]), b: rRB);
1376 }
1377#elif defined(__ARM_NEON__)
1378 const int16x8_t disty_ = vdupq_n_s16(disty);
1379 const int16x8_t idisty_ = vdupq_n_s16(idisty);
1380 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1381
1382 lim -= 3;
1383 for (; f < lim; x += 4, f += 4) {
1384 // Load 4 pixels from s1, and split the alpha-green and red-blue component
1385 int16x8_t top = vld1q_s16((int16_t*)((const uint *)(s1)+x));
1386 int16x8_t topAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(top), 8));
1387 int16x8_t topRB = vandq_s16(top, colorMask);
1388 // Multiplies each color component by idisty
1389 topAG = vmulq_s16(topAG, idisty_);
1390 topRB = vmulq_s16(topRB, idisty_);
1391
1392 // Same for the s2 vector
1393 int16x8_t bottom = vld1q_s16((int16_t*)((const uint *)(s2)+x));
1394 int16x8_t bottomAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bottom), 8));
1395 int16x8_t bottomRB = vandq_s16(bottom, colorMask);
1396 bottomAG = vmulq_s16(bottomAG, disty_);
1397 bottomRB = vmulq_s16(bottomRB, disty_);
1398
1399 // Add the values, and shift to only keep 8 significant bits per colors
1400 int16x8_t rAG = vaddq_s16(topAG, bottomAG);
1401 rAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rAG), 8));
1402 vst1q_s16((int16_t*)(&intermediate.buffer_ag[f]), rAG);
1403 int16x8_t rRB = vaddq_s16(topRB, bottomRB);
1404 rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8));
1405 vst1q_s16((int16_t*)(&intermediate.buffer_rb[f]), rRB);
1406 }
1407#endif
1408 }
1409 for (; f < count; f++) { // Same as above but without simd
1410 if (blendType == BlendTransformedBilinearTiled) {
1411 if (x >= image.width) x -= image.width;
1412 } else {
1413 x = qMin(a: x, b: image.x2 - 1);
1414 }
1415
1416 uint t = s1[x];
1417 uint b = s2[x];
1418
1419 intermediate.buffer_rb[f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
1420 intermediate.buffer_ag[f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
1421 x++;
1422 }
1423
1424 // Now interpolate the values from the intermediate.buffer to get the final result.
1425 intermediate_adder(b, end, intermediate, offset, fx, fdx);
1426}
1427
1428template<TextureBlendType blendType>
1429static void QT_FASTCALL fetchTransformedBilinearARGB32PM_upscale_helper(uint *b, uint *end, const QTextureData &image,
1430 int &fx, int &fy, int fdx, int /*fdy*/)
1431{
1432 int y1 = (fy >> 16);
1433 int y2;
1434 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1435 const uint *s1 = (const uint *)image.scanLine(y: y1);
1436 const uint *s2 = (const uint *)image.scanLine(y: y2);
1437 const int disty = (fy & 0x0000ffff) >> 8;
1438
1439 if (blendType != BlendTransformedBilinearTiled) {
1440 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1441 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1442 while (b < end) {
1443 int x1 = (fx >> 16);
1444 int x2;
1445 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1446 if (x1 != x2)
1447 break;
1448 uint top = s1[x1];
1449 uint bot = s2[x1];
1450 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty, y: bot, b: disty);
1451 fx += fdx;
1452 ++b;
1453 }
1454 uint *boundedEnd = end;
1455 if (fdx > 0)
1456 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1457 else if (fdx < 0)
1458 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1459
1460 // A fast middle part without boundary checks
1461 while (b < boundedEnd) {
1462 int x = (fx >> 16);
1463 int distx = (fx & 0x0000ffff) >> 8;
1464 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
1465 fx += fdx;
1466 ++b;
1467 }
1468 }
1469
1470 while (b < end) {
1471 int x1 = (fx >> 16);
1472 int x2;
1473 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1 , x1, x2);
1474 uint tl = s1[x1];
1475 uint tr = s1[x2];
1476 uint bl = s2[x1];
1477 uint br = s2[x2];
1478 int distx = (fx & 0x0000ffff) >> 8;
1479 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1480
1481 fx += fdx;
1482 ++b;
1483 }
1484}
1485
1486template<TextureBlendType blendType>
1487static void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper(uint *b, uint *end, const QTextureData &image,
1488 int &fx, int &fy, int fdx, int /*fdy*/)
1489{
1490 int y1 = (fy >> 16);
1491 int y2;
1492 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1493 const uint *s1 = (const uint *)image.scanLine(y: y1);
1494 const uint *s2 = (const uint *)image.scanLine(y: y2);
1495 const int disty8 = (fy & 0x0000ffff) >> 8;
1496 const int disty4 = (disty8 + 0x08) >> 4;
1497
1498 if (blendType != BlendTransformedBilinearTiled) {
1499 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1500 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1501 while (b < end) {
1502 int x1 = (fx >> 16);
1503 int x2;
1504 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1505 if (x1 != x2)
1506 break;
1507 uint top = s1[x1];
1508 uint bot = s2[x1];
1509 *b = INTERPOLATE_PIXEL_256(x: top, a: 256 - disty8, y: bot, b: disty8);
1510 fx += fdx;
1511 ++b;
1512 }
1513 uint *boundedEnd = end;
1514 if (fdx > 0)
1515 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1516 else if (fdx < 0)
1517 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1518 // A fast middle part without boundary checks
1519#if defined(__SSE2__)
1520 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1521 const __m128i v_256 = _mm_set1_epi16(w: 256);
1522 const __m128i v_disty = _mm_set1_epi16(w: disty4);
1523 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
1524 const __m128i v_fx_r = _mm_set1_epi32(i: 0x8);
1525 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
1526
1527 while (b < boundedEnd - 3) {
1528 __m128i offset = _mm_srli_epi32(a: v_fx, count: 16);
1529 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1530 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1531 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1532 const int offset3 = _mm_cvtsi128_si32(a: offset);
1533 const __m128i tl = _mm_setr_epi32(i0: s1[offset0], i1: s1[offset1], i2: s1[offset2], i3: s1[offset3]);
1534 const __m128i tr = _mm_setr_epi32(i0: s1[offset0 + 1], i1: s1[offset1 + 1], i2: s1[offset2 + 1], i3: s1[offset3 + 1]);
1535 const __m128i bl = _mm_setr_epi32(i0: s2[offset0], i1: s2[offset1], i2: s2[offset2], i3: s2[offset3]);
1536 const __m128i br = _mm_setr_epi32(i0: s2[offset0 + 1], i1: s2[offset1 + 1], i2: s2[offset2 + 1], i3: s2[offset3 + 1]);
1537
1538 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
1539 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fx_r), count: 4);
1540 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1541 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1542
1543 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1544 b += 4;
1545 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
1546 }
1547 fx = _mm_cvtsi128_si32(a: v_fx);
1548#elif defined(__ARM_NEON__)
1549 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1550 const int16x8_t invColorMask = vmvnq_s16(colorMask);
1551 const int16x8_t v_256 = vdupq_n_s16(256);
1552 const int16x8_t v_disty = vdupq_n_s16(disty4);
1553 const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4);
1554 int32x4_t v_fdx = vdupq_n_s32(fdx*4);
1555
1556 int32x4_t v_fx = vmovq_n_s32(fx);
1557 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
1558 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
1559 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
1560
1561 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
1562 const int32x4_t v_fx_r = vdupq_n_s32(0x0800);
1563
1564 while (b < boundedEnd - 3) {
1565 uint32x4x2_t v_top, v_bot;
1566
1567 int x1 = (fx >> 16);
1568 fx += fdx;
1569 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
1570 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
1571 x1 = (fx >> 16);
1572 fx += fdx;
1573 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
1574 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
1575 x1 = (fx >> 16);
1576 fx += fdx;
1577 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
1578 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
1579 x1 = (fx >> 16);
1580 fx += fdx;
1581 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
1582 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
1583
1584 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_fx_r), 12);
1585 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
1586
1587 interpolate_4_pixels_16_neon(
1588 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
1589 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
1590 vreinterpretq_s16_s32(v_distx), v_disty, v_disty_,
1591 colorMask, invColorMask, v_256, b);
1592 b+=4;
1593 v_fx = vaddq_s32(v_fx, v_fdx);
1594 }
1595#endif
1596 while (b < boundedEnd) {
1597 int x = (fx >> 16);
1598 if (hasFastInterpolate4()) {
1599 int distx8 = (fx & 0x0000ffff) >> 8;
1600 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx: distx8, disty: disty8);
1601 } else {
1602 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
1603 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx: distx4, disty: disty4);
1604 }
1605 fx += fdx;
1606 ++b;
1607 }
1608 }
1609
1610 while (b < end) {
1611 int x1 = (fx >> 16);
1612 int x2;
1613 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1614 uint tl = s1[x1];
1615 uint tr = s1[x2];
1616 uint bl = s2[x1];
1617 uint br = s2[x2];
1618 if (hasFastInterpolate4()) {
1619 int distx8 = (fx & 0x0000ffff) >> 8;
1620 *b = interpolate_4_pixels(tl, tr, bl, br, distx: distx8, disty: disty8);
1621 } else {
1622 int distx4 = ((fx & 0x0000ffff) + 0x0800) >> 12;
1623 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx: distx4, disty: disty4);
1624 }
1625 fx += fdx;
1626 ++b;
1627 }
1628}
1629
1630template<TextureBlendType blendType>
1631static void QT_FASTCALL fetchTransformedBilinearARGB32PM_rotate_helper(uint *b, uint *end, const QTextureData &image,
1632 int &fx, int &fy, int fdx, int fdy)
1633{
1634 // if we are zooming more than 8 times, we use 8bit precision for the position.
1635 while (b < end) {
1636 int x1 = (fx >> 16);
1637 int x2;
1638 int y1 = (fy >> 16);
1639 int y2;
1640
1641 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1642 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1643
1644 const uint *s1 = (const uint *)image.scanLine(y: y1);
1645 const uint *s2 = (const uint *)image.scanLine(y: y2);
1646
1647 uint tl = s1[x1];
1648 uint tr = s1[x2];
1649 uint bl = s2[x1];
1650 uint br = s2[x2];
1651
1652 int distx = (fx & 0x0000ffff) >> 8;
1653 int disty = (fy & 0x0000ffff) >> 8;
1654
1655 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1656
1657 fx += fdx;
1658 fy += fdy;
1659 ++b;
1660 }
1661}
1662
1663template<TextureBlendType blendType>
1664static void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper(uint *b, uint *end, const QTextureData &image,
1665 int &fx, int &fy, int fdx, int fdy)
1666{
1667 //we are zooming less than 8x, use 4bit precision
1668 if (blendType != BlendTransformedBilinearTiled) {
1669 const qint64 min_fx = qint64(image.x1) * fixed_scale;
1670 const qint64 max_fx = qint64(image.x2 - 1) * fixed_scale;
1671 const qint64 min_fy = qint64(image.y1) * fixed_scale;
1672 const qint64 max_fy = qint64(image.y2 - 1) * fixed_scale;
1673 // first handle the possibly bounded part in the beginning
1674 while (b < end) {
1675 int x1 = (fx >> 16);
1676 int x2;
1677 int y1 = (fy >> 16);
1678 int y2;
1679 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1680 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1681 if (x1 != x2 && y1 != y2)
1682 break;
1683 const uint *s1 = (const uint *)image.scanLine(y: y1);
1684 const uint *s2 = (const uint *)image.scanLine(y: y2);
1685 uint tl = s1[x1];
1686 uint tr = s1[x2];
1687 uint bl = s2[x1];
1688 uint br = s2[x2];
1689 if (hasFastInterpolate4()) {
1690 int distx = (fx & 0x0000ffff) >> 8;
1691 int disty = (fy & 0x0000ffff) >> 8;
1692 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1693 } else {
1694 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1695 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1696 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1697 }
1698 fx += fdx;
1699 fy += fdy;
1700 ++b;
1701 }
1702 uint *boundedEnd = end;
1703 if (fdx > 0)
1704 boundedEnd = qMin(a: boundedEnd, b: b + (max_fx - fx) / fdx);
1705 else if (fdx < 0)
1706 boundedEnd = qMin(a: boundedEnd, b: b + (min_fx - fx) / fdx);
1707 if (fdy > 0)
1708 boundedEnd = qMin(a: boundedEnd, b: b + (max_fy - fy) / fdy);
1709 else if (fdy < 0)
1710 boundedEnd = qMin(a: boundedEnd, b: b + (min_fy - fy) / fdy);
1711
1712 // until boundedEnd we can now have a fast middle part without boundary checks
1713#if defined(__SSE2__)
1714 const __m128i colorMask = _mm_set1_epi32(i: 0x00ff00ff);
1715 const __m128i v_256 = _mm_set1_epi16(w: 256);
1716 const __m128i v_fdx = _mm_set1_epi32(i: fdx*4);
1717 const __m128i v_fdy = _mm_set1_epi32(i: fdy*4);
1718 const __m128i v_fxy_r = _mm_set1_epi32(i: 0x8);
1719 __m128i v_fx = _mm_setr_epi32(i0: fx, i1: fx + fdx, i2: fx + fdx + fdx, i3: fx + fdx + fdx + fdx);
1720 __m128i v_fy = _mm_setr_epi32(i0: fy, i1: fy + fdy, i2: fy + fdy + fdy, i3: fy + fdy + fdy + fdy);
1721
1722 const uchar *textureData = image.imageData;
1723 const qsizetype bytesPerLine = image.bytesPerLine;
1724 const __m128i vbpl = _mm_shufflelo_epi16(_mm_cvtsi32_si128(bytesPerLine/4), _MM_SHUFFLE(0, 0, 0, 0));
1725
1726 while (b < boundedEnd - 3) {
1727 const __m128i vy = _mm_packs_epi32(a: _mm_srli_epi32(a: v_fy, count: 16), b: _mm_setzero_si128());
1728 // 4x16bit * 4x16bit -> 4x32bit
1729 __m128i offset = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vy, b: vbpl), b: _mm_mulhi_epi16(a: vy, b: vbpl));
1730 offset = _mm_add_epi32(a: offset, b: _mm_srli_epi32(a: v_fx, count: 16));
1731 const int offset0 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1732 const int offset1 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1733 const int offset2 = _mm_cvtsi128_si32(a: offset); offset = _mm_srli_si128(offset, 4);
1734 const int offset3 = _mm_cvtsi128_si32(a: offset);
1735 const uint *topData = (const uint *)(textureData);
1736 const __m128i tl = _mm_setr_epi32(i0: topData[offset0], i1: topData[offset1], i2: topData[offset2], i3: topData[offset3]);
1737 const __m128i tr = _mm_setr_epi32(i0: topData[offset0 + 1], i1: topData[offset1 + 1], i2: topData[offset2 + 1], i3: topData[offset3 + 1]);
1738 const uint *bottomData = (const uint *)(textureData + bytesPerLine);
1739 const __m128i bl = _mm_setr_epi32(i0: bottomData[offset0], i1: bottomData[offset1], i2: bottomData[offset2], i3: bottomData[offset3]);
1740 const __m128i br = _mm_setr_epi32(i0: bottomData[offset0 + 1], i1: bottomData[offset1 + 1], i2: bottomData[offset2 + 1], i3: bottomData[offset3 + 1]);
1741
1742 __m128i v_distx = _mm_srli_epi16(a: v_fx, count: 8);
1743 __m128i v_disty = _mm_srli_epi16(a: v_fy, count: 8);
1744 v_distx = _mm_srli_epi16(a: _mm_add_epi32(a: v_distx, b: v_fxy_r), count: 4);
1745 v_disty = _mm_srli_epi16(a: _mm_add_epi32(a: v_disty, b: v_fxy_r), count: 4);
1746 v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1747 v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0));
1748 v_disty = _mm_shufflehi_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
1749 v_disty = _mm_shufflelo_epi16(v_disty, _MM_SHUFFLE(2,2,0,0));
1750
1751 interpolate_4_pixels_16_sse2(tl, tr, bl, br, v_distx, v_disty, colorMask, v_256, b);
1752 b += 4;
1753 v_fx = _mm_add_epi32(a: v_fx, b: v_fdx);
1754 v_fy = _mm_add_epi32(a: v_fy, b: v_fdy);
1755 }
1756 fx = _mm_cvtsi128_si32(a: v_fx);
1757 fy = _mm_cvtsi128_si32(a: v_fy);
1758#elif defined(__ARM_NEON__)
1759 const int16x8_t colorMask = vdupq_n_s16(0x00ff);
1760 const int16x8_t invColorMask = vmvnq_s16(colorMask);
1761 const int16x8_t v_256 = vdupq_n_s16(256);
1762 int32x4_t v_fdx = vdupq_n_s32(fdx * 4);
1763 int32x4_t v_fdy = vdupq_n_s32(fdy * 4);
1764
1765 const uchar *textureData = image.imageData;
1766 const qsizetype bytesPerLine = image.bytesPerLine;
1767
1768 int32x4_t v_fx = vmovq_n_s32(fx);
1769 int32x4_t v_fy = vmovq_n_s32(fy);
1770 v_fx = vsetq_lane_s32(fx + fdx, v_fx, 1);
1771 v_fy = vsetq_lane_s32(fy + fdy, v_fy, 1);
1772 v_fx = vsetq_lane_s32(fx + fdx * 2, v_fx, 2);
1773 v_fy = vsetq_lane_s32(fy + fdy * 2, v_fy, 2);
1774 v_fx = vsetq_lane_s32(fx + fdx * 3, v_fx, 3);
1775 v_fy = vsetq_lane_s32(fy + fdy * 3, v_fy, 3);
1776
1777 const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff);
1778 const int32x4_t v_round = vdupq_n_s32(0x0800);
1779
1780 while (b < boundedEnd - 3) {
1781 uint32x4x2_t v_top, v_bot;
1782
1783 int x1 = (fx >> 16);
1784 int y1 = (fy >> 16);
1785 fx += fdx; fy += fdy;
1786 const uchar *sl = textureData + bytesPerLine * y1;
1787 const uint *s1 = reinterpret_cast<const uint *>(sl);
1788 const uint *s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1789 v_top = vld2q_lane_u32(s1 + x1, v_top, 0);
1790 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 0);
1791 x1 = (fx >> 16);
1792 y1 = (fy >> 16);
1793 fx += fdx; fy += fdy;
1794 sl = textureData + bytesPerLine * y1;
1795 s1 = reinterpret_cast<const uint *>(sl);
1796 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1797 v_top = vld2q_lane_u32(s1 + x1, v_top, 1);
1798 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 1);
1799 x1 = (fx >> 16);
1800 y1 = (fy >> 16);
1801 fx += fdx; fy += fdy;
1802 sl = textureData + bytesPerLine * y1;
1803 s1 = reinterpret_cast<const uint *>(sl);
1804 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1805 v_top = vld2q_lane_u32(s1 + x1, v_top, 2);
1806 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 2);
1807 x1 = (fx >> 16);
1808 y1 = (fy >> 16);
1809 fx += fdx; fy += fdy;
1810 sl = textureData + bytesPerLine * y1;
1811 s1 = reinterpret_cast<const uint *>(sl);
1812 s2 = reinterpret_cast<const uint *>(sl + bytesPerLine);
1813 v_top = vld2q_lane_u32(s1 + x1, v_top, 3);
1814 v_bot = vld2q_lane_u32(s2 + x1, v_bot, 3);
1815
1816 int32x4_t v_distx = vshrq_n_s32(vaddq_s32(vandq_s32(v_fx, v_ffff_mask), v_round), 12);
1817 int32x4_t v_disty = vshrq_n_s32(vaddq_s32(vandq_s32(v_fy, v_ffff_mask), v_round), 12);
1818 v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16));
1819 v_disty = vorrq_s32(v_disty, vshlq_n_s32(v_disty, 16));
1820 int16x8_t v_disty_ = vshlq_n_s16(vreinterpretq_s16_s32(v_disty), 4);
1821
1822 interpolate_4_pixels_16_neon(
1823 vreinterpretq_s16_u32(v_top.val[0]), vreinterpretq_s16_u32(v_top.val[1]),
1824 vreinterpretq_s16_u32(v_bot.val[0]), vreinterpretq_s16_u32(v_bot.val[1]),
1825 vreinterpretq_s16_s32(v_distx), vreinterpretq_s16_s32(v_disty),
1826 v_disty_, colorMask, invColorMask, v_256, b);
1827 b += 4;
1828 v_fx = vaddq_s32(v_fx, v_fdx);
1829 v_fy = vaddq_s32(v_fy, v_fdy);
1830 }
1831#endif
1832 while (b < boundedEnd) {
1833 int x = (fx >> 16);
1834 int y = (fy >> 16);
1835
1836 const uint *s1 = (const uint *)image.scanLine(y);
1837 const uint *s2 = (const uint *)image.scanLine(y: y + 1);
1838
1839 if (hasFastInterpolate4()) {
1840 int distx = (fx & 0x0000ffff) >> 8;
1841 int disty = (fy & 0x0000ffff) >> 8;
1842 *b = interpolate_4_pixels(t: s1 + x, b: s2 + x, distx, disty);
1843 } else {
1844 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1845 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1846 *b = interpolate_4_pixels_16(tl: s1[x], tr: s1[x + 1], bl: s2[x], br: s2[x + 1], distx, disty);
1847 }
1848
1849 fx += fdx;
1850 fy += fdy;
1851 ++b;
1852 }
1853 }
1854
1855 while (b < end) {
1856 int x1 = (fx >> 16);
1857 int x2;
1858 int y1 = (fy >> 16);
1859 int y2;
1860
1861 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1862 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1863
1864 const uint *s1 = (const uint *)image.scanLine(y: y1);
1865 const uint *s2 = (const uint *)image.scanLine(y: y2);
1866
1867 uint tl = s1[x1];
1868 uint tr = s1[x2];
1869 uint bl = s2[x1];
1870 uint br = s2[x2];
1871
1872 if (hasFastInterpolate4()) {
1873 int distx = (fx & 0x0000ffff) >> 8;
1874 int disty = (fy & 0x0000ffff) >> 8;
1875 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1876 } else {
1877 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
1878 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
1879 *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
1880 }
1881
1882 fx += fdx;
1883 fy += fdy;
1884 ++b;
1885 }
1886}
1887
1888
1889static BilinearFastTransformHelper bilinearFastTransformHelperARGB32PM[2][NFastTransformTypes] = {
1890 {
1891 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinear>,
1892 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinear>,
1893 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinear>,
1894 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinear>,
1895 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinear>
1896 },
1897 {
1898 fetchTransformedBilinearARGB32PM_simple_scale_helper<BlendTransformedBilinearTiled>,
1899 fetchTransformedBilinearARGB32PM_upscale_helper<BlendTransformedBilinearTiled>,
1900 fetchTransformedBilinearARGB32PM_downscale_helper<BlendTransformedBilinearTiled>,
1901 fetchTransformedBilinearARGB32PM_rotate_helper<BlendTransformedBilinearTiled>,
1902 fetchTransformedBilinearARGB32PM_fast_rotate_helper<BlendTransformedBilinearTiled>
1903 }
1904};
1905
1906template<TextureBlendType blendType> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */
1907static const uint * QT_FASTCALL fetchTransformedBilinearARGB32PM(uint *buffer, const Operator *,
1908 const QSpanData *data, int y, int x,
1909 int length)
1910{
1911 const qreal cx = x + qreal(0.5);
1912 const qreal cy = y + qreal(0.5);
1913 constexpr int tiled = (blendType == BlendTransformedBilinearTiled) ? 1 : 0;
1914
1915 uint *end = buffer + length;
1916 uint *b = buffer;
1917 if (canUseFastMatrixPath(cx, cy, length, data)) {
1918 // The increment pr x in the scanline
1919 int fdx = (int)(data->m11 * fixed_scale);
1920 int fdy = (int)(data->m12 * fixed_scale);
1921
1922 int fx = int((data->m21 * cy
1923 + data->m11 * cx + data->dx) * fixed_scale);
1924 int fy = int((data->m22 * cy
1925 + data->m12 * cx + data->dy) * fixed_scale);
1926
1927 fx -= half_point;
1928 fy -= half_point;
1929
1930 if (fdy == 0) { // simple scale, no rotation or shear
1931 if (qAbs(t: fdx) <= fixed_scale) {
1932 // simple scale up on X
1933 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1934 } else if (qAbs(t: fdx) <= 2 * fixed_scale) {
1935 // simple scale down on X, less than 2x
1936 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
1937 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
1938 if (mid != length)
1939 bilinearFastTransformHelperARGB32PM[tiled][SimpleScaleTransform](buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
1940 } else if (qAbs(t: data->m22) < qreal(1./8.)) {
1941 // scale up more than 8x (on Y)
1942 bilinearFastTransformHelperARGB32PM[tiled][UpscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1943 } else {
1944 // scale down on X
1945 bilinearFastTransformHelperARGB32PM[tiled][DownscaleTransform](b, end, data->texture, fx, fy, fdx, fdy);
1946 }
1947 } else { // rotation or shear
1948 if (qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.) ) {
1949 // if we are zooming more than 8 times, we use 8bit precision for the position.
1950 bilinearFastTransformHelperARGB32PM[tiled][RotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1951 } else {
1952 // we are zooming less than 8x, use 4bit precision
1953 bilinearFastTransformHelperARGB32PM[tiled][FastRotateTransform](b, end, data->texture, fx, fy, fdx, fdy);
1954 }
1955 }
1956 } else {
1957 const QTextureData &image = data->texture;
1958
1959 const qreal fdx = data->m11;
1960 const qreal fdy = data->m12;
1961 const qreal fdw = data->m13;
1962
1963 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
1964 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
1965 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
1966
1967 while (b < end) {
1968 const qreal iw = fw == 0 ? 1 : 1 / fw;
1969 const qreal px = fx * iw - qreal(0.5);
1970 const qreal py = fy * iw - qreal(0.5);
1971
1972 int x1 = int(px) - (px < 0);
1973 int x2;
1974 int y1 = int(py) - (py < 0);
1975 int y2;
1976
1977 int distx = int((px - x1) * 256);
1978 int disty = int((py - y1) * 256);
1979
1980 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
1981 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
1982
1983 const uint *s1 = (const uint *)data->texture.scanLine(y: y1);
1984 const uint *s2 = (const uint *)data->texture.scanLine(y: y2);
1985
1986 uint tl = s1[x1];
1987 uint tr = s1[x2];
1988 uint bl = s2[x1];
1989 uint br = s2[x2];
1990
1991 *b = interpolate_4_pixels(tl, tr, bl, br, distx, disty);
1992
1993 fx += fdx;
1994 fy += fdy;
1995 fw += fdw;
1996 //force increment to avoid /0
1997 if (!fw) {
1998 fw += fdw;
1999 }
2000 ++b;
2001 }
2002 }
2003
2004 return buffer;
2005}
2006
2007template<TextureBlendType blendType>
2008static void QT_FASTCALL fetchTransformedBilinear_simple_scale_helper(uint *b, uint *end, const QTextureData &image,
2009 int &fx, int &fy, int fdx, int /*fdy*/)
2010{
2011 const QPixelLayout *layout = &qPixelLayouts[image.format];
2012 const QList<QRgb> *clut = image.colorTable;
2013 const FetchAndConvertPixelsFunc fetch = layout->fetchToARGB32PM;
2014
2015 int y1 = (fy >> 16);
2016 int y2;
2017 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2018 const uchar *s1 = image.scanLine(y: y1);
2019 const uchar *s2 = image.scanLine(y: y2);
2020
2021 const int disty = (fy & 0x0000ffff) >> 8;
2022 const int idisty = 256 - disty;
2023 const int length = end - b;
2024
2025 // The intermediate buffer is generated in the positive direction
2026 const int adjust = (fdx < 0) ? fdx * length : 0;
2027 const int offset = (fx + adjust) >> 16;
2028 int x = offset;
2029
2030 IntermediateBuffer intermediate;
2031 uint *buf1 = intermediate.buffer_rb;
2032 uint *buf2 = intermediate.buffer_ag;
2033 const uint *ptr1;
2034 const uint *ptr2;
2035
2036 int count = (qint64(length) * qAbs(t: fdx) + fixed_scale - 1) / fixed_scale + 2;
2037 Q_ASSERT(count <= BufferSize + 2);
2038
2039 if (blendType == BlendTransformedBilinearTiled) {
2040 x %= image.width;
2041 if (x < 0)
2042 x += image.width;
2043 int len1 = qMin(a: count, b: image.width - x);
2044 int len2 = qMin(a: x, b: count - len1);
2045
2046 ptr1 = fetch(buf1, s1, x, len1, clut, nullptr);
2047 ptr2 = fetch(buf2, s2, x, len1, clut, nullptr);
2048 for (int i = 0; i < len1; ++i) {
2049 uint t = ptr1[i];
2050 uint b = ptr2[i];
2051 buf1[i] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2052 buf2[i] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2053 }
2054
2055 if (len2) {
2056 ptr1 = fetch(buf1 + len1, s1, 0, len2, clut, nullptr);
2057 ptr2 = fetch(buf2 + len1, s2, 0, len2, clut, nullptr);
2058 for (int i = 0; i < len2; ++i) {
2059 uint t = ptr1[i];
2060 uint b = ptr2[i];
2061 buf1[i + len1] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2062 buf2[i + len1] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2063 }
2064 }
2065 // Generate the rest by repeatedly repeating the previous set of pixels
2066 for (int i = image.width; i < count; ++i) {
2067 buf1[i] = buf1[i - image.width];
2068 buf2[i] = buf2[i - image.width];
2069 }
2070 } else {
2071 int start = qMax(a: x, b: image.x1);
2072 int end = qMin(a: x + count, b: image.x2);
2073 int len = qMax(a: 1, b: end - start);
2074 int leading = start - x;
2075
2076 ptr1 = fetch(buf1 + leading, s1, start, len, clut, nullptr);
2077 ptr2 = fetch(buf2 + leading, s2, start, len, clut, nullptr);
2078
2079 for (int i = 0; i < len; ++i) {
2080 uint t = ptr1[i];
2081 uint b = ptr2[i];
2082 buf1[i + leading] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff;
2083 buf2[i + leading] = ((((t >> 8) & 0xff00ff) * idisty + ((b >> 8) & 0xff00ff) * disty) >> 8) & 0xff00ff;
2084 }
2085
2086 for (int i = 0; i < leading; ++i) {
2087 buf1[i] = buf1[leading];
2088 buf2[i] = buf2[leading];
2089 }
2090 for (int i = leading + len; i < count; ++i) {
2091 buf1[i] = buf1[i - 1];
2092 buf2[i] = buf2[i - 1];
2093 }
2094 }
2095
2096 // Now interpolate the values from the intermediate.buffer to get the final result.
2097 intermediate_adder(b, end, intermediate, offset, fx, fdx);
2098}
2099
2100
2101template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2102static void QT_FASTCALL fetchTransformedBilinear_fetcher(T *buf1, T *buf2, const int len, const QTextureData &image,
2103 int fx, int fy, const int fdx, const int fdy)
2104{
2105 const QPixelLayout &layout = qPixelLayouts[image.format];
2106 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
2107 if (useFetch)
2108 Q_ASSERT(sizeof(T) == sizeof(uint));
2109 else
2110 Q_ASSERT(layout.bpp == bpp || (layout.bpp == QPixelLayout::BPP16FPx4 && bpp == QPixelLayout::BPP64));
2111 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout.bpp] : fetch1Pixel<bpp>;
2112 if (fdy == 0) {
2113 int y1 = (fy >> 16);
2114 int y2;
2115 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2116 const uchar *s1 = image.scanLine(y: y1);
2117 const uchar *s2 = image.scanLine(y: y2);
2118
2119 int i = 0;
2120 if (blendType == BlendTransformedBilinear) {
2121 for (; i < len; ++i) {
2122 int x1 = (fx >> 16);
2123 int x2;
2124 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2125 if (x1 != x2)
2126 break;
2127 if constexpr (useFetch) {
2128 buf1[i * 2 + 0] = buf1[i * 2 + 1] = fetch1(s1, x1);
2129 buf2[i * 2 + 0] = buf2[i * 2 + 1] = fetch1(s2, x1);
2130 } else {
2131 buf1[i * 2 + 0] = buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x1];
2132 buf2[i * 2 + 0] = buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x1];
2133 }
2134 fx += fdx;
2135 }
2136 int fastLen = len;
2137 if (fdx > 0)
2138 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2139 else if (fdx < 0)
2140 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2141
2142 for (; i < fastLen; ++i) {
2143 int x = (fx >> 16);
2144 if constexpr (useFetch) {
2145 buf1[i * 2 + 0] = fetch1(s1, x);
2146 buf1[i * 2 + 1] = fetch1(s1, x + 1);
2147 buf2[i * 2 + 0] = fetch1(s2, x);
2148 buf2[i * 2 + 1] = fetch1(s2, x + 1);
2149 } else {
2150 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
2151 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
2152 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
2153 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
2154 }
2155 fx += fdx;
2156 }
2157 }
2158
2159 for (; i < len; ++i) {
2160 int x1 = (fx >> 16);
2161 int x2;
2162 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2163 if constexpr (useFetch) {
2164 buf1[i * 2 + 0] = fetch1(s1, x1);
2165 buf1[i * 2 + 1] = fetch1(s1, x2);
2166 buf2[i * 2 + 0] = fetch1(s2, x1);
2167 buf2[i * 2 + 1] = fetch1(s2, x2);
2168 } else {
2169 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2170 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2171 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2172 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2173 }
2174 fx += fdx;
2175 }
2176 } else {
2177 int i = 0;
2178 if (blendType == BlendTransformedBilinear) {
2179 for (; i < len; ++i) {
2180 int x1 = (fx >> 16);
2181 int x2;
2182 int y1 = (fy >> 16);
2183 int y2;
2184 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2185 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2186 if (x1 != x2 && y1 != y2)
2187 break;
2188 const uchar *s1 = image.scanLine(y: y1);
2189 const uchar *s2 = image.scanLine(y: y2);
2190 if constexpr (useFetch) {
2191 buf1[i * 2 + 0] = fetch1(s1, x1);
2192 buf1[i * 2 + 1] = fetch1(s1, x2);
2193 buf2[i * 2 + 0] = fetch1(s2, x1);
2194 buf2[i * 2 + 1] = fetch1(s2, x2);
2195 } else {
2196 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2197 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2198 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2199 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2200 }
2201 fx += fdx;
2202 fy += fdy;
2203 }
2204 int fastLen = len;
2205 if (fdx > 0)
2206 fastLen = qMin(a: fastLen, b: int((qint64(image.x2 - 1) * fixed_scale - fx) / fdx));
2207 else if (fdx < 0)
2208 fastLen = qMin(a: fastLen, b: int((qint64(image.x1) * fixed_scale - fx) / fdx));
2209 if (fdy > 0)
2210 fastLen = qMin(a: fastLen, b: int((qint64(image.y2 - 1) * fixed_scale - fy) / fdy));
2211 else if (fdy < 0)
2212 fastLen = qMin(a: fastLen, b: int((qint64(image.y1) * fixed_scale - fy) / fdy));
2213
2214 for (; i < fastLen; ++i) {
2215 int x = (fx >> 16);
2216 int y = (fy >> 16);
2217 const uchar *s1 = image.scanLine(y);
2218 const uchar *s2 = s1 + image.bytesPerLine;
2219 if constexpr (useFetch) {
2220 buf1[i * 2 + 0] = fetch1(s1, x);
2221 buf1[i * 2 + 1] = fetch1(s1, x + 1);
2222 buf2[i * 2 + 0] = fetch1(s2, x);
2223 buf2[i * 2 + 1] = fetch1(s2, x + 1);
2224 } else {
2225 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x];
2226 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x + 1];
2227 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x];
2228 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x + 1];
2229 }
2230 fx += fdx;
2231 fy += fdy;
2232 }
2233 }
2234
2235 for (; i < len; ++i) {
2236 int x1 = (fx >> 16);
2237 int x2;
2238 int y1 = (fy >> 16);
2239 int y2;
2240 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2241 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2242
2243 const uchar *s1 = image.scanLine(y: y1);
2244 const uchar *s2 = image.scanLine(y: y2);
2245 if constexpr (useFetch) {
2246 buf1[i * 2 + 0] = fetch1(s1, x1);
2247 buf1[i * 2 + 1] = fetch1(s1, x2);
2248 buf2[i * 2 + 0] = fetch1(s2, x1);
2249 buf2[i * 2 + 1] = fetch1(s2, x2);
2250 } else {
2251 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2252 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2253 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2254 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2255 }
2256 fx += fdx;
2257 fy += fdy;
2258 }
2259 }
2260}
2261
2262template<TextureBlendType blendType, QPixelLayout::BPP bpp, typename T>
2263static void QT_FASTCALL fetchTransformedBilinear_slow_fetcher(T *buf1, T *buf2, ushort *distxs, ushort *distys,
2264 const int len, const QTextureData &image,
2265 qreal &fx, qreal &fy, qreal &fw,
2266 const qreal fdx, const qreal fdy, const qreal fdw)
2267{
2268 const QPixelLayout &layout = qPixelLayouts[image.format];
2269 constexpr bool useFetch = (bpp < QPixelLayout::BPP32);
2270 if (useFetch)
2271 Q_ASSERT(sizeof(T) == sizeof(uint));
2272 else
2273 Q_ASSERT(layout.bpp == bpp);
2274
2275 const Fetch1PixelFunc fetch1 = (bpp == QPixelLayout::BPPNone) ? fetch1PixelTable[layout.bpp] : fetch1Pixel<bpp>;
2276
2277 for (int i = 0; i < len; ++i) {
2278 const qreal iw = fw == 0 ? 16384 : 1 / fw;
2279 const qreal px = fx * iw - qreal(0.5);
2280 const qreal py = fy * iw - qreal(0.5);
2281
2282 int x1 = qFloor(v: px);
2283 int x2;
2284 int y1 = qFloor(v: py);
2285 int y2;
2286
2287 distxs[i] = ushort((px - x1) * (1<<16));
2288 distys[i] = ushort((py - y1) * (1<<16));
2289
2290 fetchTransformedBilinear_pixelBounds<blendType>(image.width, image.x1, image.x2 - 1, x1, x2);
2291 fetchTransformedBilinear_pixelBounds<blendType>(image.height, image.y1, image.y2 - 1, y1, y2);
2292
2293 const uchar *s1 = image.scanLine(y: y1);
2294 const uchar *s2 = image.scanLine(y: y2);
2295 if constexpr (useFetch) {
2296 buf1[i * 2 + 0] = fetch1(s1, x1);
2297 buf1[i * 2 + 1] = fetch1(s1, x2);
2298 buf2[i * 2 + 0] = fetch1(s2, x1);
2299 buf2[i * 2 + 1] = fetch1(s2, x2);
2300 } else {
2301 buf1[i * 2 + 0] = reinterpret_cast<const T *>(s1)[x1];
2302 buf1[i * 2 + 1] = reinterpret_cast<const T *>(s1)[x2];
2303 buf2[i * 2 + 0] = reinterpret_cast<const T *>(s2)[x1];
2304 buf2[i * 2 + 1] = reinterpret_cast<const T *>(s2)[x2];
2305 }
2306
2307 fx += fdx;
2308 fy += fdy;
2309 fw += fdw;
2310 }
2311}
2312
2313// blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled
2314template<TextureBlendType blendType, QPixelLayout::BPP bpp>
2315static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *,
2316 const QSpanData *data, int y, int x, int length)
2317{
2318 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2319 const QList<QRgb> *clut = data->texture.colorTable;
2320 Q_ASSERT(bpp == QPixelLayout::BPPNone || layout->bpp == bpp);
2321
2322 const qreal cx = x + qreal(0.5);
2323 const qreal cy = y + qreal(0.5);
2324
2325 if (canUseFastMatrixPath(cx, cy, length, data)) {
2326 // The increment pr x in the scanline
2327 int fdx = (int)(data->m11 * fixed_scale);
2328 int fdy = (int)(data->m12 * fixed_scale);
2329
2330 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2331 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2332
2333 fx -= half_point;
2334 fy -= half_point;
2335
2336 if (fdy == 0) { // simple scale, no rotation or shear
2337 if (qAbs(t: fdx) <= fixed_scale) { // scale up on X
2338 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + length, data->texture, fx, fy, fdx, fdy);
2339 } else if (qAbs(t: fdx) <= 2 * fixed_scale) { // scale down on X less than 2x
2340 const int mid = (length * 2 < BufferSize) ? length : ((length + 1) / 2);
2341 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer, buffer + mid, data->texture, fx, fy, fdx, fdy);
2342 if (mid != length)
2343 fetchTransformedBilinear_simple_scale_helper<blendType>(buffer + mid, buffer + length, data->texture, fx, fy, fdx, fdy);
2344 } else {
2345 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
2346
2347 uint buf1[BufferSize];
2348 uint buf2[BufferSize];
2349 uint *b = buffer;
2350 while (length) {
2351 int len = qMin(a: length, b: BufferSize / 2);
2352 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, 0);
2353 layout->convertToARGB32PM(buf1, len * 2, clut);
2354 layout->convertToARGB32PM(buf2, len * 2, clut);
2355
2356 if (hasFastInterpolate4() || qAbs(t: data->m22) < qreal(1./8.)) { // scale up more than 8x (on Y)
2357 int disty = (fy & 0x0000ffff) >> 8;
2358 for (int i = 0; i < len; ++i) {
2359 int distx = (fx & 0x0000ffff) >> 8;
2360 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2361 fx += fdx;
2362 }
2363 } else {
2364 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2365 for (int i = 0; i < len; ++i) {
2366 uint tl = buf1[i * 2 + 0];
2367 uint tr = buf1[i * 2 + 1];
2368 uint bl = buf2[i * 2 + 0];
2369 uint br = buf2[i * 2 + 1];
2370 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2371 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2372 fx += fdx;
2373 }
2374 }
2375 length -= len;
2376 b += len;
2377 }
2378 }
2379 } else { // rotation or shear
2380 const auto fetcher = fetchTransformedBilinear_fetcher<blendType,bpp,uint>;
2381
2382 uint buf1[BufferSize];
2383 uint buf2[BufferSize];
2384 uint *b = buffer;
2385 while (length) {
2386 int len = qMin(a: length, b: BufferSize / 2);
2387 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2388 layout->convertToARGB32PM(buf1, len * 2, clut);
2389 layout->convertToARGB32PM(buf2, len * 2, clut);
2390
2391 if (hasFastInterpolate4() || qAbs(t: data->m11) < qreal(1./8.) || qAbs(t: data->m22) < qreal(1./8.)) {
2392 // If we are zooming more than 8 times, we use 8bit precision for the position.
2393 for (int i = 0; i < len; ++i) {
2394 int distx = (fx & 0x0000ffff) >> 8;
2395 int disty = (fy & 0x0000ffff) >> 8;
2396
2397 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2398 fx += fdx;
2399 fy += fdy;
2400 }
2401 } else {
2402 // We are zooming less than 8x, use 4bit precision
2403 for (int i = 0; i < len; ++i) {
2404 uint tl = buf1[i * 2 + 0];
2405 uint tr = buf1[i * 2 + 1];
2406 uint bl = buf2[i * 2 + 0];
2407 uint br = buf2[i * 2 + 1];
2408
2409 int distx = ((fx & 0x0000ffff) + 0x0800) >> 12;
2410 int disty = ((fy & 0x0000ffff) + 0x0800) >> 12;
2411
2412 b[i] = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty);
2413 fx += fdx;
2414 fy += fdy;
2415 }
2416 }
2417
2418 length -= len;
2419 b += len;
2420 }
2421 }
2422 } else {
2423 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType,bpp,uint>;
2424
2425 const qreal fdx = data->m11;
2426 const qreal fdy = data->m12;
2427 const qreal fdw = data->m13;
2428
2429 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2430 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2431 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2432
2433 uint buf1[BufferSize];
2434 uint buf2[BufferSize];
2435 uint *b = buffer;
2436
2437 ushort distxs[BufferSize / 2];
2438 ushort distys[BufferSize / 2];
2439
2440 while (length) {
2441 const int len = qMin(a: length, b: BufferSize / 2);
2442 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2443
2444 layout->convertToARGB32PM(buf1, len * 2, clut);
2445 layout->convertToARGB32PM(buf2, len * 2, clut);
2446
2447 for (int i = 0; i < len; ++i) {
2448 const int distx = distxs[i] >> 8;
2449 const int disty = distys[i] >> 8;
2450
2451 b[i] = interpolate_4_pixels(t: buf1 + i * 2, b: buf2 + i * 2, distx, disty);
2452 }
2453 length -= len;
2454 b += len;
2455 }
2456 }
2457
2458 return buffer;
2459}
2460
2461#if QT_CONFIG(raster_64bit)
2462template<TextureBlendType blendType>
2463static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint32(QRgba64 *buffer, const QSpanData *data,
2464 int y, int x, int length)
2465{
2466 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2467 const auto *clut = data->texture.colorTable;
2468 const auto convert = layout->convertToRGBA64PM;
2469
2470 const qreal cx = x + qreal(0.5);
2471 const qreal cy = y + qreal(0.5);
2472
2473 uint sbuf1[BufferSize];
2474 uint sbuf2[BufferSize];
2475 alignas(8) QRgba64 buf1[BufferSize];
2476 alignas(8) QRgba64 buf2[BufferSize];
2477 QRgba64 *b = buffer;
2478
2479 if (canUseFastMatrixPath(cx, cy, length, data)) {
2480 // The increment pr x in the scanline
2481 const int fdx = (int)(data->m11 * fixed_scale);
2482 const int fdy = (int)(data->m12 * fixed_scale);
2483
2484 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2485 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2486
2487 fx -= half_point;
2488 fy -= half_point;
2489
2490 const auto fetcher =
2491 (layout->bpp == QPixelLayout::BPP32)
2492 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
2493 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2494
2495 if (fdy == 0) { //simple scale, no rotation
2496 while (length) {
2497 const int len = qMin(a: length, b: BufferSize / 2);
2498 const int disty = (fy & 0x0000ffff);
2499#if defined(__SSE2__)
2500 const __m128i vdy = _mm_set1_epi16(w: disty);
2501 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
2502#endif
2503 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2504
2505 convert(buf1, sbuf1, len * 2, clut, nullptr);
2506 if (disty)
2507 convert(buf2, sbuf2, len * 2, clut, nullptr);
2508
2509 for (int i = 0; i < len; ++i) {
2510 const int distx = (fx & 0x0000ffff);
2511#if defined(__SSE2__)
2512 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
2513 if (disty) {
2514 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
2515 vt = _mm_mulhi_epu16(a: vt, b: vidy);
2516 vb = _mm_mulhi_epu16(a: vb, b: vdy);
2517 vt = _mm_add_epi16(a: vt, b: vb);
2518 }
2519 if (distx) {
2520 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
2521 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
2522 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
2523 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
2524 }
2525 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
2526#else
2527 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
2528#endif
2529 fx += fdx;
2530 }
2531 length -= len;
2532 b += len;
2533 }
2534 } else { // rotation or shear
2535 while (length) {
2536 const int len = qMin(a: length, b: BufferSize / 2);
2537
2538 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2539
2540 convert(buf1, sbuf1, len * 2, clut, nullptr);
2541 convert(buf2, sbuf2, len * 2, clut, nullptr);
2542
2543 for (int i = 0; i < len; ++i) {
2544 const int distx = (fx & 0x0000ffff);
2545 const int disty = (fy & 0x0000ffff);
2546 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2547 fx += fdx;
2548 fy += fdy;
2549 }
2550
2551 length -= len;
2552 b += len;
2553 }
2554 }
2555 } else { // !(data->fast_matrix)
2556 const auto fetcher =
2557 (layout->bpp == QPixelLayout::BPP32)
2558 ? fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32, uint>
2559 : fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2560
2561 const qreal fdx = data->m11;
2562 const qreal fdy = data->m12;
2563 const qreal fdw = data->m13;
2564
2565 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2566 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2567 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2568
2569 ushort distxs[BufferSize / 2];
2570 ushort distys[BufferSize / 2];
2571
2572 while (length) {
2573 const int len = qMin(a: length, b: BufferSize / 2);
2574 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2575
2576 convert(buf1, sbuf1, len * 2, clut, nullptr);
2577 convert(buf2, sbuf2, len * 2, clut, nullptr);
2578
2579 for (int i = 0; i < len; ++i) {
2580 const int distx = distxs[i];
2581 const int disty = distys[i];
2582 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2583 }
2584
2585 length -= len;
2586 b += len;
2587 }
2588 }
2589 return buffer;
2590}
2591
2592template<TextureBlendType blendType>
2593static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_uint64(QRgba64 *buffer, const QSpanData *data,
2594 int y, int x, int length)
2595{
2596 const auto convert = convert64ToRGBA64PM[data->texture.format];
2597
2598 const qreal cx = x + qreal(0.5);
2599 const qreal cy = y + qreal(0.5);
2600
2601 alignas(8) QRgba64 buf1[BufferSize];
2602 alignas(8) QRgba64 buf2[BufferSize];
2603 QRgba64 *end = buffer + length;
2604 QRgba64 *b = buffer;
2605
2606 if (canUseFastMatrixPath(cx, cy, length, data)) {
2607 // The increment pr x in the scanline
2608 const int fdx = (int)(data->m11 * fixed_scale);
2609 const int fdy = (int)(data->m12 * fixed_scale);
2610
2611 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2612 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2613
2614 fx -= half_point;
2615 fy -= half_point;
2616 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2617
2618 if (fdy == 0) { //simple scale, no rotation
2619 while (length) {
2620 int len = qMin(a: length, b: BufferSize / 2);
2621 int disty = (fy & 0x0000ffff);
2622#if defined(__SSE2__)
2623 const __m128i vdy = _mm_set1_epi16(w: disty);
2624 const __m128i vidy = _mm_set1_epi16(w: 0x10000 - disty);
2625#endif
2626 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2627
2628 convert(buf1, len * 2);
2629 if (disty)
2630 convert(buf2, len * 2);
2631
2632 for (int i = 0; i < len; ++i) {
2633 int distx = (fx & 0x0000ffff);
2634#if defined(__SSE2__)
2635 __m128i vt = _mm_loadu_si128(p: (const __m128i*)(buf1 + i*2));
2636 if (disty) {
2637 __m128i vb = _mm_loadu_si128(p: (const __m128i*)(buf2 + i*2));
2638 vt = _mm_mulhi_epu16(a: vt, b: vidy);
2639 vb = _mm_mulhi_epu16(a: vb, b: vdy);
2640 vt = _mm_add_epi16(a: vt, b: vb);
2641 }
2642 if (distx) {
2643 const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
2644 const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(0x10000 - distx), _MM_SHUFFLE(0, 0, 0, 0));
2645 vt = _mm_mulhi_epu16(a: vt, b: _mm_unpacklo_epi64(a: vidistx, b: vdistx));
2646 vt = _mm_add_epi16(a: vt, _mm_srli_si128(vt, 8));
2647 }
2648 _mm_storel_epi64(p: (__m128i*)(b+i), a: vt);
2649#else
2650 b[i] = interpolate_4_pixels_rgb64(buf1 + i*2, buf2 + i*2, distx, disty);
2651#endif
2652 fx += fdx;
2653 }
2654 length -= len;
2655 b += len;
2656 }
2657 } else { // rotation or shear
2658 while (b < end) {
2659 int len = qMin(a: length, b: BufferSize / 2);
2660
2661 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
2662
2663 convert(buf1, len * 2);
2664 convert(buf2, len * 2);
2665
2666 for (int i = 0; i < len; ++i) {
2667 int distx = (fx & 0x0000ffff);
2668 int disty = (fy & 0x0000ffff);
2669 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2670 fx += fdx;
2671 fy += fdy;
2672 }
2673
2674 length -= len;
2675 b += len;
2676 }
2677 }
2678 } else { // !(data->fast_matrix)
2679 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP64, QRgba64>;
2680
2681 const qreal fdx = data->m11;
2682 const qreal fdy = data->m12;
2683 const qreal fdw = data->m13;
2684
2685 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2686 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2687 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2688
2689 ushort distxs[BufferSize / 2];
2690 ushort distys[BufferSize / 2];
2691
2692 while (length) {
2693 const int len = qMin(a: length, b: BufferSize / 2);
2694 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2695
2696 convert(buf1, len * 2);
2697 convert(buf2, len * 2);
2698
2699 for (int i = 0; i < len; ++i) {
2700 const int distx = distxs[i];
2701 const int disty = distys[i];
2702 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2703 }
2704
2705 length -= len;
2706 b += len;
2707 }
2708 }
2709 return buffer;
2710}
2711
2712template<TextureBlendType blendType>
2713static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64_f32x4(QRgba64 *buffer, const QSpanData *data,
2714 int y, int x, int length)
2715{
2716 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2717 const auto *clut = data->texture.colorTable;
2718 const auto convert = layout->fetchToRGBA64PM;
2719
2720 const qreal cx = x + qreal(0.5);
2721 const qreal cy = y + qreal(0.5);
2722
2723 QRgbaFloat32 sbuf1[BufferSize];
2724 QRgbaFloat32 sbuf2[BufferSize];
2725 alignas(8) QRgba64 buf1[BufferSize];
2726 alignas(8) QRgba64 buf2[BufferSize];
2727 QRgba64 *b = buffer;
2728
2729 if (canUseFastMatrixPath(cx, cy, length, data)) {
2730 // The increment pr x in the scanline
2731 const int fdx = (int)(data->m11 * fixed_scale);
2732 const int fdy = (int)(data->m12 * fixed_scale);
2733
2734 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2735 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2736
2737 fx -= half_point;
2738 fy -= half_point;
2739
2740 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
2741
2742 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2743 while (length) {
2744 const int len = qMin(a: length, b: BufferSize / 2);
2745
2746 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2747
2748 convert(buf1, (const uchar *)sbuf1, 0, len * 2, clut, nullptr);
2749 if (!skipsecond)
2750 convert(buf2, (const uchar *)sbuf2, 0, len * 2, clut, nullptr);
2751
2752 for (int i = 0; i < len; ++i) {
2753 const int distx = (fx & 0x0000ffff);
2754 const int disty = (fy & 0x0000ffff);
2755 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2756 fx += fdx;
2757 fy += fdy;
2758 }
2759
2760 length -= len;
2761 b += len;
2762 }
2763 } else { // !(data->fast_matrix)
2764 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
2765
2766 const qreal fdx = data->m11;
2767 const qreal fdy = data->m12;
2768 const qreal fdw = data->m13;
2769
2770 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2771 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2772 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2773
2774 ushort distxs[BufferSize / 2];
2775 ushort distys[BufferSize / 2];
2776
2777 while (length) {
2778 const int len = qMin(a: length, b: BufferSize / 2);
2779 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2780
2781 convert(buf1, (const uchar *)sbuf1, 0, len * 2, clut, nullptr);
2782 convert(buf2, (const uchar *)sbuf2, 0, len * 2, clut, nullptr);
2783
2784 for (int i = 0; i < len; ++i) {
2785 const int distx = distxs[i];
2786 const int disty = distys[i];
2787 b[i] = interpolate_4_pixels_rgb64(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2788 }
2789
2790 length -= len;
2791 b += len;
2792 }
2793 }
2794 return buffer;
2795}
2796
2797template<TextureBlendType blendType>
2798static const QRgba64 *QT_FASTCALL fetchTransformedBilinear64(QRgba64 *buffer, const Operator *,
2799 const QSpanData *data, int y, int x, int length)
2800{
2801 switch (qPixelLayouts[data->texture.format].bpp) {
2802 case QPixelLayout::BPP64:
2803 case QPixelLayout::BPP16FPx4:
2804 return fetchTransformedBilinear64_uint64<blendType>(buffer, data, y, x, length);
2805 case QPixelLayout::BPP32FPx4:
2806 return fetchTransformedBilinear64_f32x4<blendType>(buffer, data, y, x, length);
2807 default:
2808 return fetchTransformedBilinear64_uint32<blendType>(buffer, data, y, x, length);
2809 }
2810}
2811#endif
2812
2813#if QT_CONFIG(raster_fp)
2814static void interpolate_simple_rgba32f(QRgbaFloat32 *b, const QRgbaFloat32 *buf1, const QRgbaFloat32 *buf2, int len,
2815 int &fx, int fdx,
2816 int &fy, int fdy)
2817{
2818 for (int i = 0; i < len; ++i) {
2819 const int distx = (fx & 0x0000ffff);
2820 const int disty = (fy & 0x0000ffff);
2821 b[i] = interpolate_4_pixels_rgba32f(t: buf1 + i*2, b: buf2 + i*2, distx, disty);
2822 fx += fdx;
2823 fy += fdy;
2824 }
2825}
2826
2827static void interpolate_perspective_rgba32f(QRgbaFloat32 *b, const QRgbaFloat32 *buf1, const QRgbaFloat32 *buf2, int len,
2828 unsigned short *distxs,
2829 unsigned short *distys)
2830{
2831 for (int i = 0; i < len; ++i) {
2832 const int dx = distxs[i];
2833 const int dy = distys[i];
2834 b[i] = interpolate_4_pixels_rgba32f(t: buf1 + i*2, b: buf2 + i*2, distx: dx, disty: dy);
2835 }
2836}
2837
2838template<TextureBlendType blendType>
2839static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP_uint32(QRgbaFloat32 *buffer, const QSpanData *data,
2840 int y, int x, int length)
2841{
2842 const QPixelLayout *layout = &qPixelLayouts[data->texture.format];
2843 const auto *clut = data->texture.colorTable;
2844 const auto convert = qConvertToRGBA32F[data->texture.format];
2845
2846 const qreal cx = x + qreal(0.5);
2847 const qreal cy = y + qreal(0.5);
2848
2849 uint sbuf1[BufferSize];
2850 uint sbuf2[BufferSize];
2851 QRgbaFloat32 buf1[BufferSize];
2852 QRgbaFloat32 buf2[BufferSize];
2853 QRgbaFloat32 *b = buffer;
2854
2855 if (canUseFastMatrixPath(cx, cy, length, data)) {
2856 // The increment pr x in the scanline
2857 const int fdx = (int)(data->m11 * fixed_scale);
2858 const int fdy = (int)(data->m12 * fixed_scale);
2859
2860 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2861 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2862
2863 fx -= half_point;
2864 fy -= half_point;
2865
2866 const auto fetcher =
2867 (layout->bpp == QPixelLayout::BPP32)
2868 ? fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32, uint>
2869 : fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2870
2871 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2872 while (length) {
2873 const int len = qMin(a: length, b: BufferSize / 2);
2874 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2875
2876 convert(buf1, sbuf1, len * 2, clut, nullptr);
2877 if (!skipsecond)
2878 convert(buf2, sbuf2, len * 2, clut, nullptr);
2879
2880 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
2881
2882 length -= len;
2883 b += len;
2884 }
2885 } else { // !(data->fast_matrix)
2886 const auto fetcher =
2887 (layout->bpp == QPixelLayout::BPP32)
2888 ? fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32, uint>
2889 : fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPPNone, uint>;
2890
2891 const qreal fdx = data->m11;
2892 const qreal fdy = data->m12;
2893 const qreal fdw = data->m13;
2894 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2895 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2896 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2897 ushort distxs[BufferSize / 2];
2898 ushort distys[BufferSize / 2];
2899
2900 while (length) {
2901 const int len = qMin(a: length, b: BufferSize / 2);
2902 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2903
2904 convert(buf1, sbuf1, len * 2, clut, nullptr);
2905 convert(buf2, sbuf2, len * 2, clut, nullptr);
2906
2907 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
2908
2909 length -= len;
2910 b += len;
2911 }
2912 }
2913 return buffer;
2914}
2915
2916template<TextureBlendType blendType>
2917static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP_uint64(QRgbaFloat32 *buffer, const QSpanData *data,
2918 int y, int x, int length)
2919{
2920 const auto convert = convert64ToRGBA32F[data->texture.format];
2921
2922 const qreal cx = x + qreal(0.5);
2923 const qreal cy = y + qreal(0.5);
2924
2925 quint64 sbuf1[BufferSize];
2926 quint64 sbuf2[BufferSize];
2927 QRgbaFloat32 buf1[BufferSize];
2928 QRgbaFloat32 buf2[BufferSize];
2929 QRgbaFloat32 *b = buffer;
2930
2931 if (canUseFastMatrixPath(cx, cy, length, data)) {
2932 // The increment pr x in the scanline
2933 const int fdx = (int)(data->m11 * fixed_scale);
2934 const int fdy = (int)(data->m12 * fixed_scale);
2935
2936 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
2937 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
2938
2939 fx -= half_point;
2940 fy -= half_point;
2941 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP64, quint64>;
2942
2943 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
2944 while (length) {
2945 const int len = qMin(a: length, b: BufferSize / 2);
2946 fetcher(sbuf1, sbuf2, len, data->texture, fx, fy, fdx, fdy);
2947
2948 convert(buf1, sbuf1, len * 2);
2949 if (!skipsecond)
2950 convert(buf2, sbuf2, len * 2);
2951
2952 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
2953
2954 length -= len;
2955 b += len;
2956 }
2957 } else { // !(data->fast_matrix)
2958 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP64, quint64>;
2959
2960 const qreal fdx = data->m11;
2961 const qreal fdy = data->m12;
2962 const qreal fdw = data->m13;
2963
2964 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
2965 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
2966 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
2967
2968 ushort distxs[BufferSize / 2];
2969 ushort distys[BufferSize / 2];
2970
2971 while (length) {
2972 const int len = qMin(a: length, b: BufferSize / 2);
2973 fetcher(sbuf1, sbuf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
2974
2975 convert(buf1, sbuf1, len * 2);
2976 convert(buf2, sbuf2, len * 2);
2977
2978 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
2979
2980 length -= len;
2981 b += len;
2982 }
2983 }
2984 return buffer;
2985}
2986
2987template<TextureBlendType blendType>
2988static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP(QRgbaFloat32 *buffer, const QSpanData *data,
2989 int y, int x, int length)
2990{
2991 const auto convert = data->rasterBuffer->format == QImage::Format_RGBA32FPx4 ? convertRGBA32FToRGBA32FPM
2992 : convertRGBA32FToRGBA32F;
2993
2994 const qreal cx = x + qreal(0.5);
2995 const qreal cy = y + qreal(0.5);
2996
2997 QRgbaFloat32 buf1[BufferSize];
2998 QRgbaFloat32 buf2[BufferSize];
2999 QRgbaFloat32 *b = buffer;
3000
3001 if (canUseFastMatrixPath(cx, cy, length, data)) {
3002 // The increment pr x in the scanline
3003 const int fdx = (int)(data->m11 * fixed_scale);
3004 const int fdy = (int)(data->m12 * fixed_scale);
3005
3006 int fx = int((data->m21 * cy + data->m11 * cx + data->dx) * fixed_scale);
3007 int fy = int((data->m22 * cy + data->m12 * cx + data->dy) * fixed_scale);
3008
3009 fx -= half_point;
3010 fy -= half_point;
3011 const auto fetcher = fetchTransformedBilinear_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
3012
3013 const bool skipsecond = (fdy == 0) && ((fy & 0x0000ffff) == 0);
3014 while (length) {
3015 const int len = qMin(a: length, b: BufferSize / 2);
3016 fetcher(buf1, buf2, len, data->texture, fx, fy, fdx, fdy);
3017
3018 convert(buf1, len * 2);
3019 if (!skipsecond)
3020 convert(buf2, len * 2);
3021
3022 interpolate_simple_rgba32f(b, buf1, buf2, len, fx, fdx, fy, fdy);
3023
3024 length -= len;
3025 b += len;
3026 }
3027 } else { // !(data->fast_matrix)
3028 const auto fetcher = fetchTransformedBilinear_slow_fetcher<blendType, QPixelLayout::BPP32FPx4, QRgbaFloat32>;
3029
3030 const qreal fdx = data->m11;
3031 const qreal fdy = data->m12;
3032 const qreal fdw = data->m13;
3033
3034 qreal fx = data->m21 * cy + data->m11 * cx + data->dx;
3035 qreal fy = data->m22 * cy + data->m12 * cx + data->dy;
3036 qreal fw = data->m23 * cy + data->m13 * cx + data->m33;
3037
3038 ushort distxs[BufferSize / 2];
3039 ushort distys[BufferSize / 2];
3040
3041 while (length) {
3042 const int len = qMin(a: length, b: BufferSize / 2);
3043 fetcher(buf1, buf2, distxs, distys, len, data->texture, fx, fy, fw, fdx, fdy, fdw);
3044
3045 convert(buf1, len * 2);
3046 convert(buf2, len * 2);
3047
3048 interpolate_perspective_rgba32f(b, buf1, buf2, len, distxs, distys);
3049
3050 length -= len;
3051 b += len;
3052 }
3053 }
3054 return buffer;
3055}
3056
3057template<TextureBlendType blendType>
3058static const QRgbaFloat32 *QT_FASTCALL fetchTransformedBilinearFP(QRgbaFloat32 *buffer, const Operator *,
3059 const QSpanData *data, int y, int x, int length)
3060{
3061 switch (qPixelLayouts[data->texture.format].bpp) {
3062 case QPixelLayout::BPP64:
3063 case QPixelLayout::BPP16FPx4:
3064 return fetchTransformedBilinearFP_uint64<blendType>(buffer, data, y, x, length);
3065 case QPixelLayout::BPP32FPx4:
3066 return fetchTransformedBilinearFP<blendType>(buffer, data, y, x, length);
3067 default:
3068 return fetchTransformedBilinearFP_uint32<blendType>(buffer, data, y, x, length);
3069 }
3070}
3071#endif // QT_CONFIG(raster_fp)
3072
3073// FetchUntransformed can have more specialized methods added depending on SIMD features.
3074static SourceFetchProc sourceFetchUntransformed[QImage::NImageFormats] = {
3075 nullptr, // Invalid
3076 fetchUntransformed, // Mono
3077 fetchUntransformed, // MonoLsb
3078 fetchUntransformed, // Indexed8
3079 fetchUntransformedARGB32PM, // RGB32
3080 fetchUntransformed, // ARGB32
3081 fetchUntransformedARGB32PM, // ARGB32_Premultiplied
3082 fetchUntransformedRGB16, // RGB16
3083 fetchUntransformed, // ARGB8565_Premultiplied
3084 fetchUntransformed, // RGB666
3085 fetchUntransformed, // ARGB6666_Premultiplied
3086 fetchUntransformed, // RGB555
3087 fetchUntransformed, // ARGB8555_Premultiplied
3088 fetchUntransformed, // RGB888
3089 fetchUntransformed, // RGB444
3090 fetchUntransformed, // ARGB4444_Premultiplied
3091 fetchUntransformed, // RGBX8888
3092 fetchUntransformed, // RGBA8888
3093 fetchUntransformed, // RGBA8888_Premultiplied
3094 fetchUntransformed, // Format_BGR30
3095 fetchUntransformed, // Format_A2BGR30_Premultiplied
3096 fetchUntransformed, // Format_RGB30
3097 fetchUntransformed, // Format_A2RGB30_Premultiplied
3098 fetchUntransformed, // Alpha8
3099 fetchUntransformed, // Grayscale8
3100 fetchUntransformed, // RGBX64
3101 fetchUntransformed, // RGBA64
3102 fetchUntransformed, // RGBA64_Premultiplied
3103 fetchUntransformed, // Grayscale16
3104 fetchUntransformed, // BGR888
3105 fetchUntransformed, // RGBX16FPx4
3106 fetchUntransformed, // RGBA16FPx4
3107 fetchUntransformed, // RGBA16FPx4_Premultiplied
3108 fetchUntransformed, // RGBX32Px4
3109 fetchUntransformed, // RGBA32FPx4
3110 fetchUntransformed, // RGBA32FPx4_Premultiplied
3111};
3112
3113static const SourceFetchProc sourceFetchGeneric[NBlendTypes] = {
3114 fetchUntransformed, // Untransformed
3115 fetchUntransformed, // Tiled
3116 fetchTransformed<BlendTransformed, QPixelLayout::BPPNone>, // Transformed
3117 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPPNone>, // TransformedTiled
3118 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPPNone>, // TransformedBilinear
3119 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPPNone> // TransformedBilinearTiled
3120};
3121
3122static SourceFetchProc sourceFetchARGB32PM[NBlendTypes] = {
3123 fetchUntransformedARGB32PM, // Untransformed
3124 fetchUntransformedARGB32PM, // Tiled
3125 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3126 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3127 fetchTransformedBilinearARGB32PM<BlendTransformedBilinear>, // Bilinear
3128 fetchTransformedBilinearARGB32PM<BlendTransformedBilinearTiled> // BilinearTiled
3129};
3130
3131static SourceFetchProc sourceFetchAny16[NBlendTypes] = {
3132 fetchUntransformed, // Untransformed
3133 fetchUntransformed, // Tiled
3134 fetchTransformed<BlendTransformed, QPixelLayout::BPP16>, // Transformed
3135 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP16>, // TransformedTiled
3136 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP16>, // TransformedBilinear
3137 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP16> // TransformedBilinearTiled
3138};
3139
3140static SourceFetchProc sourceFetchAny32[NBlendTypes] = {
3141 fetchUntransformed, // Untransformed
3142 fetchUntransformed, // Tiled
3143 fetchTransformed<BlendTransformed, QPixelLayout::BPP32>, // Transformed
3144 fetchTransformed<BlendTransformedTiled, QPixelLayout::BPP32>, // TransformedTiled
3145 fetchTransformedBilinear<BlendTransformedBilinear, QPixelLayout::BPP32>, // TransformedBilinear
3146 fetchTransformedBilinear<BlendTransformedBilinearTiled, QPixelLayout::BPP32> // TransformedBilinearTiled
3147};
3148
3149static inline SourceFetchProc getSourceFetch(TextureBlendType blendType, QImage::Format format)
3150{
3151 if (format == QImage::Format_RGB32 || format == QImage::Format_ARGB32_Premultiplied)
3152 return sourceFetchARGB32PM[blendType];
3153 if (blendType == BlendUntransformed || blendType == BlendTiled)
3154 return sourceFetchUntransformed[format];
3155 if (qPixelLayouts[format].bpp == QPixelLayout::BPP16)
3156 return sourceFetchAny16[blendType];
3157 if (qPixelLayouts[format].bpp == QPixelLayout::BPP32)
3158 return sourceFetchAny32[blendType];
3159 return sourceFetchGeneric[blendType];
3160}
3161
3162#if QT_CONFIG(raster_64bit)
3163static const SourceFetchProc64 sourceFetchGeneric64[NBlendTypes] = {
3164 fetchUntransformed64, // Untransformed
3165 fetchUntransformed64, // Tiled
3166 fetchTransformed64<BlendTransformed>, // Transformed
3167 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
3168 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
3169 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3170};
3171
3172static const SourceFetchProc64 sourceFetchRGBA64PM[NBlendTypes] = {
3173 fetchUntransformedRGBA64PM, // Untransformed
3174 fetchUntransformedRGBA64PM, // Tiled
3175 fetchTransformed64<BlendTransformed>, // Transformed
3176 fetchTransformed64<BlendTransformedTiled>, // TransformedTiled
3177 fetchTransformedBilinear64<BlendTransformedBilinear>, // Bilinear
3178 fetchTransformedBilinear64<BlendTransformedBilinearTiled> // BilinearTiled
3179};
3180
3181static inline SourceFetchProc64 getSourceFetch64(TextureBlendType blendType, QImage::Format format)
3182{
3183 if (format == QImage::Format_RGBX64 || format == QImage::Format_RGBA64_Premultiplied)
3184 return sourceFetchRGBA64PM[blendType];
3185 return sourceFetchGeneric64[blendType];
3186}
3187#endif
3188
3189#if QT_CONFIG(raster_fp)
3190static const SourceFetchProcFP sourceFetchGenericFP[NBlendTypes] = {
3191 fetchUntransformedFP, // Untransformed
3192 fetchUntransformedFP, // Tiled
3193 fetchTransformedFP<BlendTransformed>, // Transformed
3194 fetchTransformedFP<BlendTransformedTiled>, // TransformedTiled
3195 fetchTransformedBilinearFP<BlendTransformedBilinear>, // Bilinear
3196 fetchTransformedBilinearFP<BlendTransformedBilinearTiled> // BilinearTiled
3197};
3198
3199static inline SourceFetchProcFP getSourceFetchFP(TextureBlendType blendType, QImage::Format /*format*/)
3200{
3201 return sourceFetchGenericFP[blendType];
3202}
3203#endif
3204
3205#define FIXPT_BITS 8
3206#define FIXPT_SIZE (1<<FIXPT_BITS)
3207#define FIXPT_MAX (INT_MAX >> (FIXPT_BITS + 1))
3208
3209static uint qt_gradient_pixel_fixed(const QGradientData *data, int fixed_pos)
3210{
3211 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3212 return data->colorTable32[qt_gradient_clamp(data, ipos)];
3213}
3214
3215#if QT_CONFIG(raster_64bit)
3216static const QRgba64& qt_gradient_pixel64_fixed(const QGradientData *data, int fixed_pos)
3217{
3218 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3219 return data->colorTable64[qt_gradient_clamp(data, ipos)];
3220}
3221#endif
3222
3223#if QT_CONFIG(raster_fp)
3224static inline QRgbaFloat32 qt_gradient_pixelFP(const QGradientData *data, qreal pos)
3225{
3226 int ipos = int(pos * (GRADIENT_STOPTABLE_SIZE - 1) + qreal(0.5));
3227 QRgba64 rgb64 = data->colorTable64[qt_gradient_clamp(data, ipos)];
3228 return QRgbaFloat32::fromRgba64(red: rgb64.red(),green: rgb64.green(), blue: rgb64.blue(), alpha: rgb64.alpha());
3229}
3230
3231static inline QRgbaFloat32 qt_gradient_pixelFP_fixed(const QGradientData *data, int fixed_pos)
3232{
3233 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
3234 QRgba64 rgb64 = data->colorTable64[qt_gradient_clamp(data, ipos)];
3235 return QRgbaFloat32::fromRgba64(red: rgb64.red(), green: rgb64.green(), blue: rgb64.blue(), alpha: rgb64.alpha());
3236}
3237#endif
3238
3239static void QT_FASTCALL getLinearGradientValues(LinearGradientValues *v, const QSpanData *data)
3240{
3241 v->dx = data->gradient.linear.end.x - data->gradient.linear.origin.x;
3242 v->dy = data->gradient.linear.end.y - data->gradient.linear.origin.y;
3243 v->l = v->dx * v->dx + v->dy * v->dy;
3244 v->off = 0;
3245 if (v->l != 0) {
3246 v->dx /= v->l;
3247 v->dy /= v->l;
3248 v->off = -v->dx * data->gradient.linear.origin.x - v->dy * data->gradient.linear.origin.y;
3249 }
3250}
3251
3252class GradientBase32
3253{
3254public:
3255 typedef uint Type;
3256 static Type null() { return 0; }
3257 static Type fetchSingle(const QGradientData& gradient, qreal v)
3258 {
3259 return qt_gradient_pixel(data: &gradient, pos: v);
3260 }
3261 static Type fetchSingle(const QGradientData& gradient, int v)
3262 {
3263 return qt_gradient_pixel_fixed(data: &gradient, fixed_pos: v);
3264 }
3265 static void memfill(Type *buffer, Type fill, int length)
3266 {
3267 qt_memfill32(buffer, fill, length);
3268 }
3269};
3270
3271#if QT_CONFIG(raster_64bit)
3272class GradientBase64
3273{
3274public:
3275 typedef QRgba64 Type;
3276 static Type null() { return QRgba64::fromRgba64(c: 0); }
3277 static Type fetchSingle(const QGradientData& gradient, qreal v)
3278 {
3279 return qt_gradient_pixel64(data: &gradient, pos: v);
3280 }
3281 static Type fetchSingle(const QGradientData& gradient, int v)
3282 {
3283 return qt_gradient_pixel64_fixed(data: &gradient, fixed_pos: v);
3284 }
3285 static void memfill(Type *buffer, Type fill, int length)
3286 {
3287 qt_memfill64((quint64*)buffer, fill, length);
3288 }
3289};
3290#endif
3291
3292#if QT_CONFIG(raster_fp)
3293class GradientBaseFP
3294{
3295public:
3296 typedef QRgbaFloat32 Type;
3297 static Type null() { return QRgbaFloat32::fromRgba64(red: 0,green: 0,blue: 0,alpha: 0); }
3298 static Type fetchSingle(const QGradientData& gradient, qreal v)
3299 {
3300 return qt_gradient_pixelFP(data: &gradient, pos: v);
3301 }
3302 static Type fetchSingle(const QGradientData& gradient, int v)
3303 {
3304 return qt_gradient_pixelFP_fixed(data: &gradient, fixed_pos: v);
3305 }
3306 static void memfill(Type *buffer, Type fill, int length)
3307 {
3308 quint64 fillCopy;
3309 memcpy(dest: &fillCopy, src: &fill, n: sizeof(quint64));
3310 qt_memfill64((quint64*)buffer, fillCopy, length);
3311 }
3312};
3313#endif
3314
3315template<class GradientBase, typename BlendType>
3316static inline const BlendType * QT_FASTCALL qt_fetch_linear_gradient_template(
3317 BlendType *buffer, const Operator *op, const QSpanData *data,
3318 int y, int x, int length)
3319{
3320 const BlendType *b = buffer;
3321 qreal t, inc;
3322
3323 bool affine = true;
3324 qreal rx=0, ry=0;
3325 if (op->linear.l == 0) {
3326 t = inc = 0;
3327 } else {
3328 rx = data->m21 * (y + qreal(0.5)) + data->m11 * (x + qreal(0.5)) + data->dx;
3329 ry = data->m22 * (y + qreal(0.5)) + data->m12 * (x + qreal(0.5)) + data->dy;
3330 t = op->linear.dx*rx + op->linear.dy*ry + op->linear.off;
3331 inc = op->linear.dx * data->m11 + op->linear.dy * data->m12;
3332 affine = !data->m13 && !data->m23;
3333
3334 if (affine) {
3335 t *= (GRADIENT_STOPTABLE_SIZE - 1);
3336 inc *= (GRADIENT_STOPTABLE_SIZE - 1);
3337 }
3338 }
3339
3340 const BlendType *end = buffer + length;
3341 if (affine) {
3342 if (inc > qreal(-1e-5) && inc < qreal(1e-5)) {
3343 if (std::abs(x: t) < FIXPT_MAX)
3344 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, int(t * FIXPT_SIZE)), length);
3345 else
3346 GradientBase::memfill(buffer, GradientBase::fetchSingle(data->gradient, t / GRADIENT_STOPTABLE_SIZE), length);
3347 } else {
3348 if (std::abs(x: t) < FIXPT_MAX && std::abs(x: inc) < FIXPT_MAX && std::abs(x: t + inc * length) < FIXPT_MAX) {
3349 // we can use fixed point math
3350 int t_fixed = int(t * FIXPT_SIZE);
3351 int inc_fixed = int(inc * FIXPT_SIZE);
3352 while (buffer < end) {
3353 *buffer = GradientBase::fetchSingle(data->gradient, t_fixed);
3354 t_fixed += inc_fixed;
3355 ++buffer;
3356 }
3357 } else {
3358 // we have to fall back to float math
3359 while (buffer < end) {
3360 *buffer = GradientBase::fetchSingle(data->gradient, t/GRADIENT_STOPTABLE_SIZE);
3361 t += inc;
3362 ++buffer;
3363 }
3364 }
3365 }
3366 } else { // fall back to float math here as well
3367 qreal rw = data->m23 * (y + qreal(0.5)) + data->m13 * (x + qreal(0.5)) + data->m33;
3368 while (buffer < end) {
3369 qreal x = rx/rw;
3370 qreal y = ry/rw;
3371 t = (op->linear.dx*x + op->linear.dy *y) + op->linear.off;
3372
3373 *buffer = GradientBase::fetchSingle(data->gradient, t);
3374 rx += data->m11;
3375 ry += data->m12;
3376 rw += data->m13;
3377 if (!rw) {
3378 rw += data->m13;
3379 }
3380 ++buffer;
3381 }
3382 }
3383
3384 return b;
3385}
3386
3387static const uint * QT_FASTCALL qt_fetch_linear_gradient(uint *buffer, const Operator *op, const QSpanData *data,
3388 int y, int x, int length)
3389{
3390 return qt_fetch_linear_gradient_template<GradientBase32, uint>(buffer, op, data, y, x, length);
3391}
3392
3393#if QT_CONFIG(raster_64bit)
3394static const QRgba64 * QT_FASTCALL qt_fetch_linear_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
3395 int y, int x, int length)
3396{
3397 return qt_fetch_linear_gradient_template<GradientBase64, QRgba64>(buffer, op, data, y, x, length);
3398}
3399#endif
3400#if QT_CONFIG(raster_fp)
3401static const QRgbaFloat32 * QT_FASTCALL qt_fetch_linear_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *op, const QSpanData *data,
3402 int y, int x, int length)
3403{
3404 return qt_fetch_linear_gradient_template<GradientBaseFP, QRgbaFloat32>(buffer, op, data, y, x, length);
3405}
3406#endif
3407
3408static void QT_FASTCALL getRadialGradientValues(RadialGradientValues *v, const QSpanData *data)
3409{
3410 v->dx = data->gradient.radial.center.x - data->gradient.radial.focal.x;
3411 v->dy = data->gradient.radial.center.y - data->gradient.radial.focal.y;
3412
3413 v->dr = data->gradient.radial.center.radius - data->gradient.radial.focal.radius;
3414 v->sqrfr = data->gradient.radial.focal.radius * data->gradient.radial.focal.radius;
3415
3416 v->a = v->dr * v->dr - v->dx*v->dx - v->dy*v->dy;
3417 v->inv2a = 1 / (2 * v->a);
3418
3419 v->extended = !qFuzzyIsNull(d: data->gradient.radial.focal.radius) || v->a <= 0;
3420}
3421
3422template <class GradientBase>
3423class RadialFetchPlain : public GradientBase
3424{
3425public:
3426 typedef typename GradientBase::Type BlendType;
3427 static void fetch(BlendType *buffer, BlendType *end,
3428 const Operator *op, const QSpanData *data, qreal det,
3429 qreal delta_det, qreal delta_delta_det, qreal b, qreal delta_b)
3430 {
3431 if (op->radial.extended) {
3432 while (buffer < end) {
3433 BlendType result = GradientBase::null();
3434 if (det >= 0) {
3435 qreal w = qSqrt(v: det) - b;
3436 if (data->gradient.radial.focal.radius + op->radial.dr * w >= 0)
3437 result = GradientBase::fetchSingle(data->gradient, w);
3438 }
3439
3440 *buffer = result;
3441
3442 det += delta_det;
3443 delta_det += delta_delta_det;
3444 b += delta_b;
3445
3446 ++buffer;
3447 }
3448 } else {
3449 while (buffer < end) {
3450 *buffer++ = GradientBase::fetchSingle(data->gradient, qSqrt(v: det) - b);
3451
3452 det += delta_det;
3453 delta_det += delta_delta_det;
3454 b += delta_b;
3455 }
3456 }
3457 }
3458};
3459
3460const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
3461 int y, int x, int length)
3462{
3463 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase32>, uint>(buffer, op, data, y, x, length);
3464}
3465
3466static SourceFetchProc qt_fetch_radial_gradient = qt_fetch_radial_gradient_plain;
3467
3468#if QT_CONFIG(raster_64bit)
3469const QRgba64 * QT_FASTCALL qt_fetch_radial_gradient_rgb64(QRgba64 *buffer, const Operator *op, const QSpanData *data,
3470 int y, int x, int length)
3471{
3472 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBase64>, QRgba64>(buffer, op, data, y, x, length);
3473}
3474#endif
3475
3476#if QT_CONFIG(raster_fp)
3477static const QRgbaFloat32 * QT_FASTCALL qt_fetch_radial_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *op, const QSpanData *data,
3478 int y, int x, int length)
3479{
3480 return qt_fetch_radial_gradient_template<RadialFetchPlain<GradientBaseFP>, QRgbaFloat32>(buffer, op, data, y, x, length);
3481}
3482#endif
3483
3484template <class GradientBase, typename BlendType>
3485static inline const BlendType * QT_FASTCALL qt_fetch_conical_gradient_template(
3486 BlendType *buffer, const QSpanData *data,
3487 int y, int x, int length)
3488{
3489 const BlendType *b = buffer;
3490 qreal rx = data->m21 * (y + qreal(0.5))
3491 + data->dx + data->m11 * (x + qreal(0.5));
3492 qreal ry = data->m22 * (y + qreal(0.5))
3493 + data->dy + data->m12 * (x + qreal(0.5));
3494 bool affine = !data->m13 && !data->m23;
3495
3496 const qreal inv2pi = M_1_PI / 2.0;
3497
3498 const BlendType *end = buffer + length;
3499 if (affine) {
3500 rx -= data->gradient.conical.center.x;
3501 ry -= data->gradient.conical.center.y;
3502 while (buffer < end) {
3503 qreal angle = qAtan2(y: ry, x: rx) + data->gradient.conical.angle;
3504
3505 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
3506
3507 rx += data->m11;
3508 ry += data->m12;
3509 ++buffer;
3510 }
3511 } else {
3512 qreal rw = data->m23 * (y + qreal(0.5))
3513 + data->m33 + data->m13 * (x + qreal(0.5));
3514 if (!rw)
3515 rw = 1;
3516 while (buffer < end) {
3517 qreal angle = qAtan2(y: ry/rw - data->gradient.conical.center.x,
3518 x: rx/rw - data->gradient.conical.center.y)
3519 + data->gradient.conical.angle;
3520
3521 *buffer = GradientBase::fetchSingle(data->gradient, 1 - angle * inv2pi);
3522
3523 rx += data->m11;
3524 ry += data->m12;
3525 rw += data->m13;
3526 if (!rw) {
3527 rw += data->m13;
3528 }
3529 ++buffer;
3530 }
3531 }
3532 return b;
3533}
3534
3535static const uint * QT_FASTCALL qt_fetch_conical_gradient(uint *buffer, const Operator *, const QSpanData *data,
3536 int y, int x, int length)
3537{
3538 return qt_fetch_conical_gradient_template<GradientBase32, uint>(buffer, data, y, x, length);
3539}
3540
3541#if QT_CONFIG(raster_64bit)
3542static const QRgba64 * QT_FASTCALL qt_fetch_conical_gradient_rgb64(QRgba64 *buffer, const Operator *, const QSpanData *data,
3543 int y, int x, int length)
3544{
3545 return qt_fetch_conical_gradient_template<GradientBase64, QRgba64>(buffer, data, y, x, length);
3546}
3547#endif
3548
3549#if QT_CONFIG(raster_fp)
3550static const QRgbaFloat32 * QT_FASTCALL qt_fetch_conical_gradient_rgbfp(QRgbaFloat32 *buffer, const Operator *, const QSpanData *data,
3551 int y, int x, int length)
3552{
3553 return qt_fetch_conical_gradient_template<GradientBaseFP, QRgbaFloat32>(buffer, data, y, x, length);
3554}
3555#endif
3556
3557extern CompositionFunctionSolid qt_functionForModeSolid_C[];
3558extern CompositionFunctionSolid64 qt_functionForModeSolid64_C[];
3559extern CompositionFunctionSolidFP qt_functionForModeSolidFP_C[];
3560
3561static const CompositionFunctionSolid *functionForModeSolid = qt_functionForModeSolid_C;
3562#if QT_CONFIG(raster_64bit)
3563static const CompositionFunctionSolid64 *functionForModeSolid64 = qt_functionForModeSolid64_C;
3564#endif
3565#if QT_CONFIG(raster_fp)
3566static const CompositionFunctionSolidFP *functionForModeSolidFP = qt_functionForModeSolidFP_C;
3567#endif
3568
3569extern CompositionFunction qt_functionForMode_C[];
3570extern CompositionFunction64 qt_functionForMode64_C[];
3571extern CompositionFunctionFP qt_functionForModeFP_C[];
3572
3573static const CompositionFunction *functionForMode = qt_functionForMode_C;
3574#if QT_CONFIG(raster_64bit)
3575static const CompositionFunction64 *functionForMode64 = qt_functionForMode64_C;
3576#endif
3577#if QT_CONFIG(raster_fp)
3578static const CompositionFunctionFP *functionForModeFP = qt_functionForModeFP_C;
3579#endif
3580
3581static TextureBlendType getBlendType(const QSpanData *data)
3582{
3583 TextureBlendType ft;
3584 if (data->texture.type == QTextureData::Pattern)
3585 ft = BlendTiled;
3586 else if (data->txop <= QTransform::TxTranslate)
3587 if (data->texture.type == QTextureData::Tiled)
3588 ft = BlendTiled;
3589 else
3590 ft = BlendUntransformed;
3591 else if (data->bilinear)
3592 if (data->texture.type == QTextureData::Tiled)
3593 ft = BlendTransformedBilinearTiled;
3594 else
3595 ft = BlendTransformedBilinear;
3596 else
3597 if (data->texture.type == QTextureData::Tiled)
3598 ft = BlendTransformedTiled;
3599 else
3600 ft = BlendTransformed;
3601 return ft;
3602}
3603
3604static inline Operator getOperator(const QSpanData *data, const QT_FT_Span *spans, int spanCount)
3605{
3606 Operator op;
3607 bool solidSource = false;
3608
3609 switch(data->type) {
3610 case QSpanData::Solid:
3611 solidSource = data->solidColor.alphaF() >= 1.0f;
3612 op.srcFetch = nullptr;
3613 op.srcFetch64 = nullptr;
3614 op.srcFetchFP = nullptr;
3615 break;
3616 case QSpanData::LinearGradient:
3617 solidSource = !data->gradient.alphaColor;
3618 getLinearGradientValues(v: &op.linear, data);
3619 op.srcFetch = qt_fetch_linear_gradient;
3620#if QT_CONFIG(raster_64bit)
3621 op.srcFetch64 = qt_fetch_linear_gradient_rgb64;
3622#endif
3623#if QT_CONFIG(raster_fp)
3624 op.srcFetchFP = qt_fetch_linear_gradient_rgbfp;
3625#endif
3626 break;
3627 case QSpanData::RadialGradient:
3628 solidSource = !data->gradient.alphaColor;
3629 getRadialGradientValues(v: &op.radial, data);
3630 op.srcFetch = qt_fetch_radial_gradient;
3631#if QT_CONFIG(raster_64bit)
3632 op.srcFetch64 = qt_fetch_radial_gradient_rgb64;
3633#endif
3634#if QT_CONFIG(raster_fp)
3635 op.srcFetchFP = qt_fetch_radial_gradient_rgbfp;
3636#endif
3637 break;
3638 case QSpanData::ConicalGradient:
3639 solidSource = !data->gradient.alphaColor;
3640 op.srcFetch = qt_fetch_conical_gradient;
3641#if QT_CONFIG(raster_64bit)
3642 op.srcFetch64 = qt_fetch_conical_gradient_rgb64;
3643#endif
3644#if QT_CONFIG(raster_fp)
3645 op.srcFetchFP = qt_fetch_conical_gradient_rgbfp;
3646#endif
3647 break;
3648 case QSpanData::Texture:
3649 solidSource = !data->texture.hasAlpha;
3650 op.srcFetch = getSourceFetch(blendType: getBlendType(data), format: data->texture.format);
3651#if QT_CONFIG(raster_64bit)
3652 op.srcFetch64 = getSourceFetch64(blendType: getBlendType(data), format: data->texture.format);;
3653#endif
3654#if QT_CONFIG(raster_fp)
3655 op.srcFetchFP = getSourceFetchFP(blendType: getBlendType(data), data->texture.format);
3656#endif
3657 break;
3658 default:
3659 Q_UNREACHABLE();
3660 break;
3661 }
3662#if !QT_CONFIG(raster_64bit)
3663 op.srcFetch64 = nullptr;
3664#endif
3665#if !QT_CONFIG(raster_fp)
3666 op.srcFetchFP = nullptr;
3667#endif
3668
3669 op.mode = data->rasterBuffer->compositionMode;
3670 if (op.mode == QPainter::CompositionMode_SourceOver && solidSource)
3671 op.mode = QPainter::CompositionMode_Source;
3672
3673 op.destFetch = destFetchProc[data->rasterBuffer->format];
3674#if QT_CONFIG(raster_64bit)
3675 op.destFetch64 = destFetchProc64[data->rasterBuffer->format];
3676#else
3677 op.destFetch64 = nullptr;
3678#endif
3679#if QT_CONFIG(raster_fp)
3680 op.destFetchFP = destFetchProcFP[data->rasterBuffer->format];
3681#else
3682 op.destFetchFP = nullptr;
3683#endif
3684 if (op.mode == QPainter::CompositionMode_Source &&
3685 (data->type != QSpanData::Texture || data->texture.const_alpha == 256)) {
3686 const QT_FT_Span *lastSpan = spans + spanCount;
3687 bool alphaSpans = false;
3688 while (spans < lastSpan) {
3689 if (spans->coverage != 255) {
3690 alphaSpans = true;
3691 break;
3692 }
3693 ++spans;
3694 }
3695 if (!alphaSpans && spanCount > 0) {
3696 // If all spans are opaque we do not need to fetch dest.
3697 // But don't clear passthrough destFetch as they are just as fast and save destStore.
3698 if (op.destFetch != destFetchARGB32P)
3699 op.destFetch = destFetchUndefined;
3700#if QT_CONFIG(raster_64bit)
3701 if (op.destFetch64 != destFetchRGB64)
3702 op.destFetch64 = destFetch64Undefined;
3703#endif
3704#if QT_CONFIG(raster_fp)
3705 if (op.destFetchFP != destFetchRGBFP)
3706 op.destFetchFP = destFetchFPUndefined;
3707#endif
3708 }
3709 }
3710
3711 op.destStore = destStoreProc[data->rasterBuffer->format];
3712 op.funcSolid = functionForModeSolid[op.mode];
3713 op.func = functionForMode[op.mode];
3714#if QT_CONFIG(raster_64bit)
3715 op.destStore64 = destStoreProc64[data->rasterBuffer->format];
3716 op.funcSolid64 = functionForModeSolid64[op.mode];
3717 op.func64 = functionForMode64[op.mode];
3718#else
3719 op.destStore64 = nullptr;
3720 op.funcSolid64 = nullptr;
3721 op.func64 = nullptr;
3722#endif
3723#if QT_CONFIG(raster_fp)
3724 op.destStoreFP = destStoreFP;
3725 op.funcSolidFP = functionForModeSolidFP[op.mode];
3726 op.funcFP = functionForModeFP[op.mode];
3727#else
3728 op.destStoreFP = nullptr;
3729 op.funcSolidFP = nullptr;
3730 op.funcFP = nullptr;
3731#endif
3732
3733 return op;
3734}
3735
3736static void spanfill_from_first(QRasterBuffer *rasterBuffer, QPixelLayout::BPP bpp, int x, int y, int length)
3737{
3738 switch (bpp) {
3739 case QPixelLayout::BPP32FPx4: {
3740 QRgbaFloat32 *dest = reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->scanLine(y)) + x;
3741 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3742 break;
3743 }
3744 case QPixelLayout::BPP16FPx4:
3745 case QPixelLayout::BPP64: {
3746 quint64 *dest = reinterpret_cast<quint64 *>(rasterBuffer->scanLine(y)) + x;
3747 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3748 break;
3749 }
3750 case QPixelLayout::BPP32: {
3751 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y)) + x;
3752 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3753 break;
3754 }
3755 case QPixelLayout::BPP24: {
3756 quint24 *dest = reinterpret_cast<quint24 *>(rasterBuffer->scanLine(y)) + x;
3757 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3758 break;
3759 }
3760 case QPixelLayout::BPP16: {
3761 quint16 *dest = reinterpret_cast<quint16 *>(rasterBuffer->scanLine(y)) + x;
3762 qt_memfill_template(dest: dest + 1, color: dest[0], count: length - 1);
3763 break;
3764 }
3765 case QPixelLayout::BPP8: {
3766 uchar *dest = rasterBuffer->scanLine(y) + x;
3767 memset(s: dest + 1, c: dest[0], n: length - 1);
3768 break;
3769 }
3770 default:
3771 Q_UNREACHABLE();
3772 }
3773}
3774
3775
3776// -------------------- blend methods ---------------------
3777
3778#if defined(QT_USE_THREAD_PARALLEL_FILLS)
3779#define QT_THREAD_PARALLEL_FILLS(function) \
3780 const int segments = (count + 32) / 64; \
3781 QThreadPool *threadPool = QThreadPoolPrivate::qtGuiInstance(); \
3782 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) { \
3783 QSemaphore semaphore; \
3784 int c = 0; \
3785 for (int i = 0; i < segments; ++i) { \
3786 int cn = (count - c) / (segments - i); \
3787 threadPool->start([&, c, cn]() { \
3788 function(c, c + cn); \
3789 semaphore.release(1); \
3790 }, 1); \
3791 c += cn; \
3792 } \
3793 semaphore.acquire(segments); \
3794 } else \
3795 function(0, count)
3796#else
3797#define QT_THREAD_PARALLEL_FILLS(function) function(0, count)
3798#endif
3799
3800static void blend_color_generic(int count, const QT_FT_Span *spans, void *userData)
3801{
3802 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3803 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3804 const uint color = data->solidColor.rgba();
3805 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3806 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3807
3808 auto function = [=] (int cStart, int cEnd) {
3809 alignas(16) uint buffer[BufferSize];
3810 for (int c = cStart; c < cEnd; ++c) {
3811 int x = spans[c].x;
3812 int length = spans[c].len;
3813 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore) {
3814 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3815 op.destStore(data->rasterBuffer, x, spans[c].y, &color, 1);
3816 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3817 length = 0;
3818 }
3819
3820 while (length) {
3821 int l = qMin(a: BufferSize, b: length);
3822 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
3823 op.funcSolid(dest, l, color, spans[c].coverage);
3824 if (op.destStore)
3825 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
3826 length -= l;
3827 x += l;
3828 }
3829 }
3830 };
3831 QT_THREAD_PARALLEL_FILLS(function);
3832}
3833
3834static void blend_color_argb(int count, const QT_FT_Span *spans, void *userData)
3835{
3836 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3837
3838 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3839 const uint color = data->solidColor.rgba();
3840
3841 if (op.mode == QPainter::CompositionMode_Source) {
3842 // inline for performance
3843 while (count--) {
3844 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans->y)) + spans->x;
3845 if (spans->coverage == 255) {
3846 qt_memfill(dest: target, color, count: spans->len);
3847#ifdef __SSE2__
3848 } else if (spans->len > 16) {
3849 op.funcSolid(target, spans->len, color, spans->coverage);
3850#endif
3851 } else {
3852 uint c = BYTE_MUL(x: color, a: spans->coverage);
3853 int ialpha = 255 - spans->coverage;
3854 for (int i = 0; i < spans->len; ++i)
3855 target[i] = c + BYTE_MUL(x: target[i], a: ialpha);
3856 }
3857 ++spans;
3858 }
3859 return;
3860 }
3861 const auto funcSolid = op.funcSolid;
3862 auto function = [=] (int cStart, int cEnd) {
3863 for (int c = cStart; c < cEnd; ++c) {
3864 uint *target = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + spans[c].x;
3865 funcSolid(target, spans[c].len, color, spans[c].coverage);
3866 }
3867 };
3868 QT_THREAD_PARALLEL_FILLS(function);
3869}
3870
3871static void blend_color_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
3872{
3873#if QT_CONFIG(raster_64bit)
3874 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3875 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3876 if (!op.funcSolid64) {
3877 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_rgb64: unsupported 64bit blend attempted, falling back to 32-bit");
3878 return blend_color_generic(count, spans, userData);
3879 }
3880
3881 const QRgba64 color = data->solidColor.rgba64();
3882 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3883 const QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3884
3885 auto function = [=, &op] (int cStart, int cEnd)
3886 {
3887 alignas(16) QRgba64 buffer[BufferSize];
3888 for (int c = cStart; c < cEnd; ++c) {
3889 int x = spans[c].x;
3890 int length = spans[c].len;
3891 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStore64) {
3892 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3893 op.destStore64(data->rasterBuffer, x, spans[c].y, &color, 1);
3894 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3895 length = 0;
3896 }
3897
3898 while (length) {
3899 int l = qMin(a: BufferSize, b: length);
3900 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
3901 op.funcSolid64(dest, l, color, spans[c].coverage);
3902 if (op.destStore64)
3903 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
3904 length -= l;
3905 x += l;
3906 }
3907 }
3908 };
3909 QT_THREAD_PARALLEL_FILLS(function);
3910#else
3911 blend_color_generic(count, spans, userData);
3912#endif
3913}
3914
3915static void blend_color_generic_fp(int count, const QT_FT_Span *spans, void *userData)
3916{
3917#if QT_CONFIG(raster_fp)
3918 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
3919 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
3920 if (!op.funcSolidFP || !op.destFetchFP) {
3921 qCDebug(lcQtGuiDrawHelper, "blend_color_generic_fp: unsupported 4xF16 blend attempted, falling back to 32-bit");
3922 return blend_color_generic(count, spans, userData);
3923 }
3924
3925 float r, g, b, a;
3926 data->solidColor.getRgbF(r: &r, g: &g, b: &b, a: &a);
3927 const QRgbaFloat32 color{.r: r, .g: g, .b: b, .a: a};
3928 const bool solidFill = op.mode == QPainter::CompositionMode_Source;
3929 QPixelLayout::BPP bpp = qPixelLayouts[data->rasterBuffer->format].bpp;
3930
3931 auto function = [=, &op] (int cStart, int cEnd)
3932 {
3933 alignas(16) QRgbaFloat32 buffer[BufferSize];
3934 for (int c = cStart; c < cEnd; ++c) {
3935 int x = spans[c].x;
3936 int length = spans[c].len;
3937 if (solidFill && bpp >= QPixelLayout::BPP8 && spans[c].coverage == 255 && length && op.destStoreFP) {
3938 // If dest doesn't matter we don't need to bother with blending or converting all the identical pixels
3939 op.destStoreFP(data->rasterBuffer, x, spans[c].y, &color, 1);
3940 spanfill_from_first(rasterBuffer: data->rasterBuffer, bpp, x, y: spans[c].y, length);
3941 length = 0;
3942 }
3943
3944 while (length) {
3945 int l = qMin(a: BufferSize, b: length);
3946 QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
3947 op.funcSolidFP(dest, l, color, spans[c].coverage);
3948 if (op.destStoreFP)
3949 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
3950 length -= l;
3951 x += l;
3952 }
3953 }
3954 };
3955 QT_THREAD_PARALLEL_FILLS(function);
3956#else
3957 blend_color_generic(count, spans, userData);
3958#endif
3959}
3960
3961template <typename T>
3962void handleSpans(int count, const QT_FT_Span *spans, const QSpanData *data, const Operator &op)
3963{
3964 const int const_alpha = (data->type == QSpanData::Texture) ? data->texture.const_alpha : 256;
3965 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256;
3966
3967 auto function = [=, &op] (int cStart, int cEnd)
3968 {
3969 T handler(data, op);
3970 int coverage = 0;
3971 for (int c = cStart; c < cEnd;) {
3972 if (!spans[c].len) {
3973 ++c;
3974 continue;
3975 }
3976 int x = spans[c].x;
3977 const int y = spans[c].y;
3978 int right = x + spans[c].len;
3979 const bool fetchDest = !solidSource || spans[c].coverage < 255;
3980
3981 // compute length of adjacent spans
3982 for (int i = c + 1; i < cEnd && spans[i].y == y && spans[i].x == right && fetchDest == (!solidSource || spans[i].coverage < 255); ++i)
3983 right += spans[i].len;
3984 int length = right - x;
3985
3986 while (length) {
3987 int l = qMin(a: BufferSize, b: length);
3988 length -= l;
3989
3990 int process_length = l;
3991 int process_x = x;
3992
3993 const auto *src = handler.fetch(process_x, y, process_length, fetchDest);
3994 int offset = 0;
3995 while (l > 0) {
3996 if (x == spans[c].x) // new span?
3997 coverage = (spans[c].coverage * const_alpha) >> 8;
3998
3999 int right = spans[c].x + spans[c].len;
4000 int len = qMin(a: l, b: right - x);
4001
4002 handler.process(x, y, len, coverage, src, offset);
4003
4004 l -= len;
4005 x += len;
4006 offset += len;
4007
4008 if (x == right) // done with current span?
4009 ++c;
4010 }
4011 handler.store(process_x, y, process_length);
4012 }
4013 }
4014 };
4015 QT_THREAD_PARALLEL_FILLS(function);
4016}
4017
4018struct QBlendBase
4019{
4020 const QSpanData *data;
4021 const Operator &op;
4022};
4023
4024class BlendSrcGeneric : public QBlendBase
4025{
4026public:
4027 uint *dest = nullptr;
4028 alignas(16) uint buffer[BufferSize];
4029 alignas(16) uint src_buffer[BufferSize];
4030 BlendSrcGeneric(const QSpanData *d, const Operator &o)
4031 : QBlendBase{.data: d, .op: o}
4032 {
4033 }
4034
4035 const uint *fetch(int x, int y, int len, bool fetchDest)
4036 {
4037 if (fetchDest || op.destFetch == destFetchARGB32P)
4038 dest = op.destFetch(buffer, data->rasterBuffer, x, y, len);
4039 else
4040 dest = buffer;
4041 return op.srcFetch(src_buffer, &op, data, y, x, len);
4042 }
4043
4044 void process(int, int, int len, int coverage, const uint *src, int offset)
4045 {
4046 op.func(dest + offset, src + offset, len, coverage);
4047 }
4048
4049 void store(int x, int y, int len)
4050 {
4051 if (op.destStore)
4052 op.destStore(data->rasterBuffer, x, y, dest, len);
4053 }
4054};
4055
4056#if QT_CONFIG(raster_64bit)
4057class BlendSrcGenericRGB64 : public QBlendBase
4058{
4059public:
4060 QRgba64 *dest = nullptr;
4061 alignas(16) QRgba64 buffer[BufferSize];
4062 alignas(16) QRgba64 src_buffer[BufferSize];
4063 BlendSrcGenericRGB64(const QSpanData *d, const Operator &o)
4064 : QBlendBase{.data: d, .op: o}
4065 {
4066 }
4067
4068 bool isSupported() const
4069 {
4070 return op.func64 && op.destFetch64;
4071 }
4072
4073 const QRgba64 *fetch(int x, int y, int len, bool fetchDest)
4074 {
4075 if (fetchDest || op.destFetch64 == destFetchRGB64)
4076 dest = op.destFetch64(buffer, data->rasterBuffer, x, y, len);
4077 else
4078 dest = buffer;
4079 return op.srcFetch64(src_buffer, &op, data, y, x, len);
4080 }
4081
4082 void process(int, int, int len, int coverage, const QRgba64 *src, int offset)
4083 {
4084 op.func64(dest + offset, src + offset, len, coverage);
4085 }
4086
4087 void store(int x, int y, int len)
4088 {
4089 if (op.destStore64)
4090 op.destStore64(data->rasterBuffer, x, y, dest, len);
4091 }
4092};
4093#endif
4094
4095#if QT_CONFIG(raster_fp)
4096class BlendSrcGenericRGBFP : public QBlendBase
4097{
4098public:
4099 QRgbaFloat32 *dest = nullptr;
4100 alignas(16) QRgbaFloat32 buffer[BufferSize];
4101 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4102 BlendSrcGenericRGBFP(const QSpanData *d, const Operator &o)
4103 : QBlendBase{.data: d, .op: o}
4104 {
4105 }
4106
4107 bool isSupported() const
4108 {
4109 return op.funcFP && op.destFetchFP && op.srcFetchFP;
4110 }
4111
4112 const QRgbaFloat32 *fetch(int x, int y, int len, bool fetchDest)
4113 {
4114 if (fetchDest || op.destFetchFP == destFetchRGBFP)
4115 dest = op.destFetchFP(buffer, data->rasterBuffer, x, y, len);
4116 else
4117 dest = buffer;
4118 return op.srcFetchFP(src_buffer, &op, data, y, x, len);
4119 }
4120
4121 void process(int, int, int len, int coverage, const QRgbaFloat32 *src, int offset)
4122 {
4123 op.funcFP(dest + offset, src + offset, len, coverage);
4124 }
4125
4126 void store(int x, int y, int len)
4127 {
4128 if (op.destStoreFP)
4129 op.destStoreFP(data->rasterBuffer, x, y, dest, len);
4130 }
4131};
4132#endif
4133
4134static void blend_src_generic(int count, const QT_FT_Span *spans, void *userData)
4135{
4136 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4137 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4138 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4139}
4140
4141#if QT_CONFIG(raster_64bit)
4142static void blend_src_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4143{
4144 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4145 const Operator op = getOperator(data, spans: nullptr, spanCount: 0);
4146 if (op.func64 && op.destFetch64) {
4147 handleSpans<BlendSrcGenericRGB64>(count, spans, data, op);
4148 } else {
4149 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4150 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4151 }
4152}
4153#endif
4154
4155#if QT_CONFIG(raster_fp)
4156static void blend_src_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4157{
4158 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4159 const Operator op = getOperator(data, spans, spanCount: count);
4160 if (op.funcFP && op.destFetchFP && op.srcFetchFP) {
4161 handleSpans<BlendSrcGenericRGBFP>(count, spans, data, op);
4162 } else {
4163 qCDebug(lcQtGuiDrawHelper, "blend_src_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
4164 handleSpans<BlendSrcGeneric>(count, spans, data, op);
4165 }
4166}
4167#endif
4168
4169static void blend_untransformed_generic(int count, const QT_FT_Span *spans, void *userData)
4170{
4171 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4172
4173 const Operator op = getOperator(data, spans, spanCount: count);
4174
4175 const int image_width = data->texture.width;
4176 const int image_height = data->texture.height;
4177 const int const_alpha = data->texture.const_alpha;
4178 const int xoff = -qRound(d: -data->dx);
4179 const int yoff = -qRound(d: -data->dy);
4180 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch != destFetchARGB32P;
4181
4182 auto function = [=, &op] (int cStart, int cEnd)
4183 {
4184 alignas(16) uint buffer[BufferSize];
4185 alignas(16) uint src_buffer[BufferSize];
4186 for (int c = cStart; c < cEnd; ++c) {
4187 if (!spans[c].len)
4188 continue;
4189 int x = spans[c].x;
4190 int length = spans[c].len;
4191 int sx = xoff + x;
4192 int sy = yoff + spans[c].y;
4193 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4194 if (sy >= 0 && sy < image_height && sx < image_width) {
4195 if (sx < 0) {
4196 x -= sx;
4197 length += sx;
4198 sx = 0;
4199 }
4200 if (sx + length > image_width)
4201 length = image_width - sx;
4202 if (length > 0) {
4203 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4204 while (length) {
4205 int l = qMin(a: BufferSize, b: length);
4206 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4207 uint *dest = fetchDest ? op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4208 op.func(dest, src, l, coverage);
4209 if (op.destStore)
4210 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
4211 x += l;
4212 sx += l;
4213 length -= l;
4214 }
4215 }
4216 }
4217 }
4218 };
4219 QT_THREAD_PARALLEL_FILLS(function);
4220}
4221
4222#if QT_CONFIG(raster_64bit)
4223static void blend_untransformed_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4224{
4225 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4226
4227 const Operator op = getOperator(data, spans, spanCount: count);
4228 if (!op.func64) {
4229 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4230 return blend_untransformed_generic(count, spans, userData);
4231 }
4232
4233 const int image_width = data->texture.width;
4234 const int image_height = data->texture.height;
4235 const int const_alpha = data->texture.const_alpha;
4236 const int xoff = -qRound(d: -data->dx);
4237 const int yoff = -qRound(d: -data->dy);
4238 const bool solidSource = op.mode == QPainter::CompositionMode_Source && const_alpha == 256 && op.destFetch64 != destFetchRGB64;
4239
4240 auto function = [=, &op] (int cStart, int cEnd)
4241 {
4242 alignas(16) QRgba64 buffer[BufferSize];
4243 alignas(16) QRgba64 src_buffer[BufferSize];
4244 for (int c = cStart; c < cEnd; ++c) {
4245 if (!spans[c].len)
4246 continue;
4247 int x = spans[c].x;
4248 int length = spans[c].len;
4249 int sx = xoff + x;
4250 int sy = yoff + spans[c].y;
4251 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4252 if (sy >= 0 && sy < image_height && sx < image_width) {
4253 if (sx < 0) {
4254 x -= sx;
4255 length += sx;
4256 sx = 0;
4257 }
4258 if (sx + length > image_width)
4259 length = image_width - sx;
4260 if (length > 0) {
4261 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4262 while (length) {
4263 int l = qMin(a: BufferSize, b: length);
4264 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4265 QRgba64 *dest = fetchDest ? op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4266 op.func64(dest, src, l, coverage);
4267 if (op.destStore64)
4268 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
4269 x += l;
4270 sx += l;
4271 length -= l;
4272 }
4273 }
4274 }
4275 }
4276 };
4277 QT_THREAD_PARALLEL_FILLS(function);
4278}
4279#endif
4280
4281#if QT_CONFIG(raster_fp)
4282static void blend_untransformed_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4283{
4284 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4285
4286 const Operator op = getOperator(data, spans, spanCount: count);
4287 if (!op.funcFP) {
4288 qCDebug(lcQtGuiDrawHelper, "blend_untransformed_generic_rgbaf16: unsupported 4xFP16 blend attempted, falling back to 32-bit");
4289 return blend_untransformed_generic(count, spans, userData);
4290 }
4291
4292 const int image_width = data->texture.width;
4293 const int image_height = data->texture.height;
4294 const int xoff = -qRound(d: -data->dx);
4295 const int yoff = -qRound(d: -data->dy);
4296 const bool solidSource = op.mode == QPainter::CompositionMode_Source && data->texture.const_alpha == 256 && op.destFetchFP != destFetchRGBFP;
4297
4298 auto function = [=, &op] (int cStart, int cEnd)
4299 {
4300 alignas(16) QRgbaFloat32 buffer[BufferSize];
4301 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4302 for (int c = cStart; c < cEnd; ++c) {
4303 if (!spans[c].len)
4304 continue;
4305 int x = spans[c].x;
4306 int length = spans[c].len;
4307 int sx = xoff + x;
4308 int sy = yoff + spans[c].y;
4309 const bool fetchDest = !solidSource || spans[c].coverage < 255;
4310 if (sy >= 0 && sy < image_height && sx < image_width) {
4311 if (sx < 0) {
4312 x -= sx;
4313 length += sx;
4314 sx = 0;
4315 }
4316 if (sx + length > image_width)
4317 length = image_width - sx;
4318 if (length > 0) {
4319 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4320 while (length) {
4321 int l = qMin(a: BufferSize, b: length);
4322 const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
4323 QRgbaFloat32 *dest = fetchDest ? op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l) : buffer;
4324 op.funcFP(dest, src, l, coverage);
4325 if (op.destStoreFP)
4326 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
4327 x += l;
4328 sx += l;
4329 length -= l;
4330 }
4331 }
4332 }
4333 }
4334 };
4335 QT_THREAD_PARALLEL_FILLS(function);
4336}
4337#endif
4338
4339static void blend_untransformed_argb(int count, const QT_FT_Span *spans, void *userData)
4340{
4341 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4342 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4343 && data->texture.format != QImage::Format_RGB32) {
4344 blend_untransformed_generic(count, spans, userData);
4345 return;
4346 }
4347
4348 const Operator op = getOperator(data, spans, spanCount: count);
4349
4350 const int image_width = data->texture.width;
4351 const int image_height = data->texture.height;
4352 const int const_alpha = data->texture.const_alpha;
4353 const int xoff = -qRound(d: -data->dx);
4354 const int yoff = -qRound(d: -data->dy);
4355
4356 auto function = [=, &op] (int cStart, int cEnd)
4357 {
4358 for (int c = cStart; c < cEnd; ++c) {
4359 if (!spans[c].len)
4360 continue;
4361 int x = spans[c].x;
4362 int length = spans[c].len;
4363 int sx = xoff + x;
4364 int sy = yoff + spans[c].y;
4365 if (sy >= 0 && sy < image_height && sx < image_width) {
4366 if (sx < 0) {
4367 x -= sx;
4368 length += sx;
4369 sx = 0;
4370 }
4371 if (sx + length > image_width)
4372 length = image_width - sx;
4373 if (length > 0) {
4374 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4375 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
4376 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4377 op.func(dest, src, length, coverage);
4378 }
4379 }
4380 }
4381 };
4382 QT_THREAD_PARALLEL_FILLS(function);
4383}
4384
4385static inline quint16 interpolate_pixel_rgb16_255(quint16 x, quint8 a,
4386 quint16 y, quint8 b)
4387{
4388 quint16 t = ((((x & 0x07e0) * a) + ((y & 0x07e0) * b)) >> 5) & 0x07e0;
4389 t |= ((((x & 0xf81f) * a) + ((y & 0xf81f) * b)) >> 5) & 0xf81f;
4390
4391 return t;
4392}
4393
4394static inline quint32 interpolate_pixel_rgb16x2_255(quint32 x, quint8 a,
4395 quint32 y, quint8 b)
4396{
4397 uint t;
4398 t = ((((x & 0xf81f07e0) >> 5) * a) + (((y & 0xf81f07e0) >> 5) * b)) & 0xf81f07e0;
4399 t |= ((((x & 0x07e0f81f) * a) + ((y & 0x07e0f81f) * b)) >> 5) & 0x07e0f81f;
4400 return t;
4401}
4402
4403static inline void blend_sourceOver_rgb16_rgb16(quint16 *Q_DECL_RESTRICT dest,
4404 const quint16 *Q_DECL_RESTRICT src,
4405 int length,
4406 const quint8 alpha,
4407 const quint8 ialpha)
4408{
4409 const int dstAlign = ((quintptr)dest) & 0x3;
4410 if (dstAlign) {
4411 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
4412 ++dest;
4413 ++src;
4414 --length;
4415 }
4416 const int srcAlign = ((quintptr)src) & 0x3;
4417 int length32 = length >> 1;
4418 if (length32 && srcAlign == 0) {
4419 while (length32--) {
4420 const quint32 *src32 = reinterpret_cast<const quint32*>(src);
4421 quint32 *dest32 = reinterpret_cast<quint32*>(dest);
4422 *dest32 = interpolate_pixel_rgb16x2_255(x: *src32, a: alpha,
4423 y: *dest32, b: ialpha);
4424 dest += 2;
4425 src += 2;
4426 }
4427 length &= 0x1;
4428 }
4429 while (length--) {
4430 *dest = interpolate_pixel_rgb16_255(x: *src, a: alpha, y: *dest, b: ialpha);
4431 ++dest;
4432 ++src;
4433 }
4434}
4435
4436static void blend_untransformed_rgb565(int count, const QT_FT_Span *spans, void *userData)
4437{
4438 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
4439 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4440
4441 if (data->texture.format != QImage::Format_RGB16
4442 || (mode != QPainter::CompositionMode_SourceOver
4443 && mode != QPainter::CompositionMode_Source))
4444 {
4445 blend_untransformed_generic(count, spans, userData);
4446 return;
4447 }
4448
4449 const int image_width = data->texture.width;
4450 const int image_height = data->texture.height;
4451 int xoff = -qRound(d: -data->dx);
4452 int yoff = -qRound(d: -data->dy);
4453
4454 auto function = [=](int cStart, int cEnd)
4455 {
4456 for (int c = cStart; c < cEnd; ++c) {
4457 if (!spans[c].len)
4458 continue;
4459 const quint8 coverage = (data->texture.const_alpha * spans[c].coverage) >> 8;
4460 if (coverage == 0)
4461 continue;
4462
4463 int x = spans[c].x;
4464 int length = spans[c].len;
4465 int sx = xoff + x;
4466 int sy = yoff + spans[c].y;
4467 if (sy >= 0 && sy < image_height && sx < image_width) {
4468 if (sx < 0) {
4469 x -= sx;
4470 length += sx;
4471 sx = 0;
4472 }
4473 if (sx + length > image_width)
4474 length = image_width - sx;
4475 if (length > 0) {
4476 quint16 *dest = (quint16 *)data->rasterBuffer->scanLine(y: spans[c].y) + x;
4477 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4478 if (coverage == 255) {
4479 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
4480 } else {
4481 const quint8 alpha = (coverage + 1) >> 3;
4482 const quint8 ialpha = 0x20 - alpha;
4483 if (alpha > 0)
4484 blend_sourceOver_rgb16_rgb16(dest, src, length, alpha, ialpha);
4485 }
4486 }
4487 }
4488 }
4489 };
4490 QT_THREAD_PARALLEL_FILLS(function);
4491}
4492
4493static void blend_tiled_generic(int count, const QT_FT_Span *spans, void *userData)
4494{
4495 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4496
4497 const Operator op = getOperator(data, spans, spanCount: count);
4498
4499 const int image_width = data->texture.width;
4500 const int image_height = data->texture.height;
4501 const int const_alpha = data->texture.const_alpha;
4502 int xoff = -qRound(d: -data->dx) % image_width;
4503 int yoff = -qRound(d: -data->dy) % image_height;
4504
4505 if (xoff < 0)
4506 xoff += image_width;
4507 if (yoff < 0)
4508 yoff += image_height;
4509
4510 auto function = [=, &op](int cStart, int cEnd)
4511 {
4512 alignas(16) uint buffer[BufferSize];
4513 alignas(16) uint src_buffer[BufferSize];
4514 for (int c = cStart; c < cEnd; ++c) {
4515 int x = spans[c].x;
4516 int length = spans[c].len;
4517 int sx = (xoff + spans[c].x) % image_width;
4518 int sy = (spans[c].y + yoff) % image_height;
4519 if (sx < 0)
4520 sx += image_width;
4521 if (sy < 0)
4522 sy += image_height;
4523
4524 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4525 while (length) {
4526 int l = qMin(a: image_width - sx, b: length);
4527 if (BufferSize < l)
4528 l = BufferSize;
4529 const uint *src = op.srcFetch(src_buffer, &op, data, sy, sx, l);
4530 uint *dest = op.destFetch(buffer, data->rasterBuffer, x, spans[c].y, l);
4531 op.func(dest, src, l, coverage);
4532 if (op.destStore)
4533 op.destStore(data->rasterBuffer, x, spans[c].y, dest, l);
4534 x += l;
4535 sx += l;
4536 length -= l;
4537 if (sx >= image_width)
4538 sx = 0;
4539 }
4540 }
4541 };
4542 QT_THREAD_PARALLEL_FILLS(function);
4543}
4544
4545#if QT_CONFIG(raster_64bit)
4546static void blend_tiled_generic_rgb64(int count, const QT_FT_Span *spans, void *userData)
4547{
4548 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4549
4550 const Operator op = getOperator(data, spans, spanCount: count);
4551 if (!op.func64) {
4552 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_rgb64: unsupported 64-bit blend attempted, falling back to 32-bit");
4553 return blend_tiled_generic(count, spans, userData);
4554 }
4555
4556 const int image_width = data->texture.width;
4557 const int image_height = data->texture.height;
4558 int xoff = -qRound(d: -data->dx) % image_width;
4559 int yoff = -qRound(d: -data->dy) % image_height;
4560
4561 if (xoff < 0)
4562 xoff += image_width;
4563 if (yoff < 0)
4564 yoff += image_height;
4565
4566 bool isBpp32 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP32;
4567 bool isBpp64 = qPixelLayouts[data->rasterBuffer->format].bpp == QPixelLayout::BPP64;
4568 if (op.destFetch64 == destFetch64Undefined && image_width <= BufferSize && (isBpp32 || isBpp64)) {
4569 alignas(16) QRgba64 src_buffer[BufferSize];
4570 // If destination isn't blended into the result, we can do the tiling directly on destination pixels.
4571 while (count--) {
4572 int x = spans->x;
4573 int y = spans->y;
4574 int length = spans->len;
4575 int sx = (xoff + spans->x) % image_width;
4576 int sy = (spans->y + yoff) % image_height;
4577 if (sx < 0)
4578 sx += image_width;
4579 if (sy < 0)
4580 sy += image_height;
4581
4582 int sl = qMin(a: image_width, b: length);
4583 if (sx > 0 && sl > 0) {
4584 int l = qMin(a: image_width - sx, b: sl);
4585 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4586 op.destStore64(data->rasterBuffer, x, y, src, l);
4587 x += l;
4588 sx += l;
4589 sl -= l;
4590 if (sx >= image_width)
4591 sx = 0;
4592 }
4593 if (sl > 0) {
4594 Q_ASSERT(sx == 0);
4595 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, sl);
4596 op.destStore64(data->rasterBuffer, x, y, src, sl);
4597 x += sl;
4598 sx += sl;
4599 sl -= sl;
4600 if (sx >= image_width)
4601 sx = 0;
4602 }
4603 if (isBpp32) {
4604 uint *dest = reinterpret_cast<uint *>(data->rasterBuffer->scanLine(y)) + x - image_width;
4605 for (int i = image_width; i < length; ++i)
4606 dest[i] = dest[i - image_width];
4607 } else {
4608 quint64 *dest = reinterpret_cast<quint64 *>(data->rasterBuffer->scanLine(y)) + x - image_width;
4609 for (int i = image_width; i < length; ++i)
4610 dest[i] = dest[i - image_width];
4611 }
4612 ++spans;
4613 }
4614 return;
4615 }
4616
4617 auto function = [=, &op](int cStart, int cEnd)
4618 {
4619 alignas(16) QRgba64 buffer[BufferSize];
4620 alignas(16) QRgba64 src_buffer[BufferSize];
4621 for (int c = cStart; c < cEnd; ++c) {
4622 int x = spans[c].x;
4623 int length = spans[c].len;
4624 int sx = (xoff + spans[c].x) % image_width;
4625 int sy = (spans[c].y + yoff) % image_height;
4626 if (sx < 0)
4627 sx += image_width;
4628 if (sy < 0)
4629 sy += image_height;
4630
4631 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4632 while (length) {
4633 int l = qMin(a: image_width - sx, b: length);
4634 if (BufferSize < l)
4635 l = BufferSize;
4636 const QRgba64 *src = op.srcFetch64(src_buffer, &op, data, sy, sx, l);
4637 QRgba64 *dest = op.destFetch64(buffer, data->rasterBuffer, x, spans[c].y, l);
4638 op.func64(dest, src, l, coverage);
4639 if (op.destStore64)
4640 op.destStore64(data->rasterBuffer, x, spans[c].y, dest, l);
4641 x += l;
4642 sx += l;
4643 length -= l;
4644 if (sx >= image_width)
4645 sx = 0;
4646 }
4647 }
4648 };
4649 QT_THREAD_PARALLEL_FILLS(function);
4650}
4651#endif
4652
4653#if QT_CONFIG(raster_fp)
4654static void blend_tiled_generic_fp(int count, const QT_FT_Span *spans, void *userData)
4655{
4656 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4657
4658 const Operator op = getOperator(data, spans, spanCount: count);
4659 if (!op.funcFP) {
4660 qCDebug(lcQtGuiDrawHelper, "blend_tiled_generic_fp: unsupported 4xFP blend attempted, falling back to 32-bit");
4661 return blend_tiled_generic(count, spans, userData);
4662 }
4663
4664 const int image_width = data->texture.width;
4665 const int image_height = data->texture.height;
4666 int xoff = -qRound(d: -data->dx) % image_width;
4667 int yoff = -qRound(d: -data->dy) % image_height;
4668
4669 if (xoff < 0)
4670 xoff += image_width;
4671 if (yoff < 0)
4672 yoff += image_height;
4673
4674 // Consider tiling optimizing like the other versions.
4675
4676 auto function = [=, &op](int cStart, int cEnd)
4677 {
4678 alignas(16) QRgbaFloat32 buffer[BufferSize];
4679 alignas(16) QRgbaFloat32 src_buffer[BufferSize];
4680 for (int c = cStart; c < cEnd; ++c) {
4681 int x = spans[c].x;
4682 int length = spans[c].len;
4683 int sx = (xoff + spans[c].x) % image_width;
4684 int sy = (spans[c].y + yoff) % image_height;
4685 if (sx < 0)
4686 sx += image_width;
4687 if (sy < 0)
4688 sy += image_height;
4689
4690 const int coverage = (spans[c].coverage * data->texture.const_alpha) >> 8;
4691 while (length) {
4692 int l = qMin(a: image_width - sx, b: length);
4693 if (BufferSize < l)
4694 l = BufferSize;
4695 const QRgbaFloat32 *src = op.srcFetchFP(src_buffer, &op, data, sy, sx, l);
4696 QRgbaFloat32 *dest = op.destFetchFP(buffer, data->rasterBuffer, x, spans[c].y, l);
4697 op.funcFP(dest, src, l, coverage);
4698 if (op.destStoreFP)
4699 op.destStoreFP(data->rasterBuffer, x, spans[c].y, dest, l);
4700 x += l;
4701 sx += l;
4702 length -= l;
4703 if (sx >= image_width)
4704 sx = 0;
4705 }
4706 }
4707 };
4708 QT_THREAD_PARALLEL_FILLS(function);
4709}
4710#endif
4711
4712static void blend_tiled_argb(int count, const QT_FT_Span *spans, void *userData)
4713{
4714 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4715 if (data->texture.format != QImage::Format_ARGB32_Premultiplied
4716 && data->texture.format != QImage::Format_RGB32) {
4717 blend_tiled_generic(count, spans, userData);
4718 return;
4719 }
4720
4721 const Operator op = getOperator(data, spans, spanCount: count);
4722
4723 const int image_width = data->texture.width;
4724 const int image_height = data->texture.height;
4725 int xoff = -qRound(d: -data->dx) % image_width;
4726 int yoff = -qRound(d: -data->dy) % image_height;
4727
4728 if (xoff < 0)
4729 xoff += image_width;
4730 if (yoff < 0)
4731 yoff += image_height;
4732 const auto func = op.func;
4733 const int const_alpha = data->texture.const_alpha;
4734
4735 auto function = [=] (int cStart, int cEnd) {
4736 for (int c = cStart; c < cEnd; ++c) {
4737 int x = spans[c].x;
4738 int length = spans[c].len;
4739 int sx = (xoff + spans[c].x) % image_width;
4740 int sy = (spans[c].y + yoff) % image_height;
4741 if (sx < 0)
4742 sx += image_width;
4743 if (sy < 0)
4744 sy += image_height;
4745
4746 const int coverage = (spans[c].coverage * const_alpha) >> 8;
4747 while (length) {
4748 int l = qMin(a: image_width - sx, b: length);
4749 if (BufferSize < l)
4750 l = BufferSize;
4751 const uint *src = (const uint *)data->texture.scanLine(y: sy) + sx;
4752 uint *dest = ((uint *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4753 func(dest, src, l, coverage);
4754 x += l;
4755 sx += l;
4756 length -= l;
4757 if (sx >= image_width)
4758 sx = 0;
4759 }
4760 }
4761 };
4762 QT_THREAD_PARALLEL_FILLS(function);
4763}
4764
4765static void blend_tiled_rgb565(int count, const QT_FT_Span *spans, void *userData)
4766{
4767 QSpanData *data = reinterpret_cast<QSpanData*>(userData);
4768 QPainter::CompositionMode mode = data->rasterBuffer->compositionMode;
4769
4770 if (data->texture.format != QImage::Format_RGB16
4771 || (mode != QPainter::CompositionMode_SourceOver
4772 && mode != QPainter::CompositionMode_Source))
4773 {
4774 blend_tiled_generic(count, spans, userData);
4775 return;
4776 }
4777
4778 const int image_width = data->texture.width;
4779 const int image_height = data->texture.height;
4780 int xoff = -qRound(d: -data->dx) % image_width;
4781 int yoff = -qRound(d: -data->dy) % image_height;
4782
4783 if (xoff < 0)
4784 xoff += image_width;
4785 if (yoff < 0)
4786 yoff += image_height;
4787
4788 const int const_alpha = data->texture.const_alpha;
4789 auto function = [=] (int cStart, int cEnd) {
4790 for (int c = cStart; c < cEnd; ++c) {
4791 const quint8 coverage = (const_alpha * spans[c].coverage) >> 8;
4792 if (coverage == 0)
4793 continue;
4794
4795 int x = spans[c].x;
4796 int length = spans[c].len;
4797 int sx = (xoff + spans[c].x) % image_width;
4798 int sy = (spans[c].y + yoff) % image_height;
4799 if (sx < 0)
4800 sx += image_width;
4801 if (sy < 0)
4802 sy += image_height;
4803
4804 if (coverage == 255) {
4805 // Copy the first texture block
4806 length = qMin(a: image_width,b: length);
4807 int tx = x;
4808 while (length) {
4809 int l = qMin(a: image_width - sx, b: length);
4810 if (BufferSize < l)
4811 l = BufferSize;
4812 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + tx;
4813 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4814 memcpy(dest: dest, src: src, n: l * sizeof(quint16));
4815 length -= l;
4816 tx += l;
4817 sx += l;
4818 if (sx >= image_width)
4819 sx = 0;
4820 }
4821
4822 // Now use the rasterBuffer as the source of the texture,
4823 // We can now progressively copy larger blocks
4824 // - Less cpu time in code figuring out what to copy
4825 // We are dealing with one block of data
4826 // - More likely to fit in the cache
4827 // - can use memcpy
4828 int copy_image_width = qMin(a: image_width, b: int(spans[c].len));
4829 length = spans[c].len - copy_image_width;
4830 quint16 *src = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4831 quint16 *dest = src + copy_image_width;
4832 while (copy_image_width < length) {
4833 memcpy(dest: dest, src: src, n: copy_image_width * sizeof(quint16));
4834 dest += copy_image_width;
4835 length -= copy_image_width;
4836 copy_image_width *= 2;
4837 }
4838 if (length > 0)
4839 memcpy(dest: dest, src: src, n: length * sizeof(quint16));
4840 } else {
4841 const quint8 alpha = (coverage + 1) >> 3;
4842 const quint8 ialpha = 0x20 - alpha;
4843 if (alpha > 0) {
4844 while (length) {
4845 int l = qMin(a: image_width - sx, b: length);
4846 if (BufferSize < l)
4847 l = BufferSize;
4848 quint16 *dest = ((quint16 *)data->rasterBuffer->scanLine(y: spans[c].y)) + x;
4849 const quint16 *src = (const quint16 *)data->texture.scanLine(y: sy) + sx;
4850 blend_sourceOver_rgb16_rgb16(dest, src, length: l, alpha, ialpha);
4851 x += l;
4852 sx += l;
4853 length -= l;
4854 if (sx >= image_width)
4855 sx = 0;
4856 }
4857 }
4858 }
4859 }
4860 };
4861 QT_THREAD_PARALLEL_FILLS(function);
4862}
4863
4864/* Image formats here are target formats */
4865static const ProcessSpans processTextureSpansARGB32PM[NBlendTypes] = {
4866 blend_untransformed_argb, // Untransformed
4867 blend_tiled_argb, // Tiled
4868 blend_src_generic, // Transformed
4869 blend_src_generic, // TransformedTiled
4870 blend_src_generic, // TransformedBilinear
4871 blend_src_generic // TransformedBilinearTiled
4872};
4873
4874static const ProcessSpans processTextureSpansRGB16[NBlendTypes] = {
4875 blend_untransformed_rgb565, // Untransformed
4876 blend_tiled_rgb565, // Tiled
4877 blend_src_generic, // Transformed
4878 blend_src_generic, // TransformedTiled
4879 blend_src_generic, // TransformedBilinear
4880 blend_src_generic // TransformedBilinearTiled
4881};
4882
4883static const ProcessSpans processTextureSpansGeneric[NBlendTypes] = {
4884 blend_untransformed_generic, // Untransformed
4885 blend_tiled_generic, // Tiled
4886 blend_src_generic, // Transformed
4887 blend_src_generic, // TransformedTiled
4888 blend_src_generic, // TransformedBilinear
4889 blend_src_generic // TransformedBilinearTiled
4890};
4891
4892#if QT_CONFIG(raster_64bit)
4893static const ProcessSpans processTextureSpansGeneric64[NBlendTypes] = {
4894 blend_untransformed_generic_rgb64, // Untransformed
4895 blend_tiled_generic_rgb64, // Tiled
4896 blend_src_generic_rgb64, // Transformed
4897 blend_src_generic_rgb64, // TransformedTiled
4898 blend_src_generic_rgb64, // TransformedBilinear
4899 blend_src_generic_rgb64 // TransformedBilinearTiled
4900};
4901#endif
4902
4903#if QT_CONFIG(raster_fp)
4904static const ProcessSpans processTextureSpansGenericFP[NBlendTypes] = {
4905 blend_untransformed_generic_fp, // Untransformed
4906 blend_tiled_generic_fp, // Tiled
4907 blend_src_generic_fp, // Transformed
4908 blend_src_generic_fp, // TransformedTiled
4909 blend_src_generic_fp, // TransformedBilinear
4910 blend_src_generic_fp // TransformedBilinearTiled
4911};
4912#endif
4913void qBlendTexture(int count, const QT_FT_Span *spans, void *userData)
4914{
4915 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4916 TextureBlendType blendType = getBlendType(data);
4917 ProcessSpans proc;
4918 switch (data->rasterBuffer->format) {
4919 case QImage::Format_Invalid:
4920 Q_UNREACHABLE_RETURN();
4921 case QImage::Format_ARGB32_Premultiplied:
4922 proc = processTextureSpansARGB32PM[blendType];
4923 break;
4924 case QImage::Format_RGB16:
4925 proc = processTextureSpansRGB16[blendType];
4926 break;
4927#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
4928 case QImage::Format_ARGB32:
4929 case QImage::Format_RGBA8888:
4930#endif
4931 case QImage::Format_BGR30:
4932 case QImage::Format_A2BGR30_Premultiplied:
4933 case QImage::Format_RGB30:
4934 case QImage::Format_A2RGB30_Premultiplied:
4935 case QImage::Format_RGBX64:
4936 case QImage::Format_RGBA64:
4937 case QImage::Format_RGBA64_Premultiplied:
4938 case QImage::Format_Grayscale16:
4939#if !QT_CONFIG(raster_fp)
4940 case QImage::Format_RGBX16FPx4:
4941 case QImage::Format_RGBA16FPx4:
4942 case QImage::Format_RGBA16FPx4_Premultiplied:
4943 case QImage::Format_RGBX32FPx4:
4944 case QImage::Format_RGBA32FPx4:
4945 case QImage::Format_RGBA32FPx4_Premultiplied:
4946#endif
4947#if QT_CONFIG(raster_64bit)
4948 proc = processTextureSpansGeneric64[blendType];
4949 break;
4950#endif // QT_CONFIG(raster_64bit)
4951#if QT_CONFIG(raster_fp)
4952 case QImage::Format_RGBX16FPx4:
4953 case QImage::Format_RGBA16FPx4:
4954 case QImage::Format_RGBA16FPx4_Premultiplied:
4955 case QImage::Format_RGBX32FPx4:
4956 case QImage::Format_RGBA32FPx4:
4957 case QImage::Format_RGBA32FPx4_Premultiplied:
4958 proc = processTextureSpansGenericFP[blendType];
4959 break;
4960#endif
4961 default:
4962 proc = processTextureSpansGeneric[blendType];
4963 break;
4964 }
4965 proc(count, spans, userData);
4966}
4967
4968static void blend_vertical_gradient_argb(int count, const QT_FT_Span *spans, void *userData)
4969{
4970 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
4971
4972 LinearGradientValues linear;
4973 getLinearGradientValues(v: &linear, data);
4974
4975 CompositionFunctionSolid funcSolid =
4976 functionForModeSolid[data->rasterBuffer->compositionMode];
4977
4978 /*
4979 The logic for vertical gradient calculations is a mathematically
4980 reduced copy of that in fetchLinearGradient() - which is basically:
4981
4982 qreal ry = data->m22 * (y + 0.5) + data->dy;
4983 qreal t = linear.dy*ry + linear.off;
4984 t *= (GRADIENT_STOPTABLE_SIZE - 1);
4985 quint32 color =
4986 qt_gradient_pixel_fixed(&data->gradient,
4987 int(t * FIXPT_SIZE));
4988
4989 This has then been converted to fixed point to improve performance.
4990 */
4991 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
4992 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
4993 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
4994
4995 while (count--) {
4996 int y = spans->y;
4997 int x = spans->x;
4998
4999 quint32 *dst = (quint32 *)(data->rasterBuffer->scanLine(y)) + x;
5000 quint32 color =
5001 qt_gradient_pixel_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5002
5003 funcSolid(dst, spans->len, color, spans->coverage);
5004 ++spans;
5005 }
5006}
5007
5008template<ProcessSpans blend_color>
5009static void blend_vertical_gradient(int count, const QT_FT_Span *spans, void *userData)
5010{
5011 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5012
5013 LinearGradientValues linear;
5014 getLinearGradientValues(v: &linear, data);
5015
5016 // Based on the same logic as blend_vertical_gradient_argb.
5017
5018 const int gss = GRADIENT_STOPTABLE_SIZE - 1;
5019 int yinc = int((linear.dy * data->m22 * gss) * FIXPT_SIZE);
5020 int off = int((((linear.dy * (data->m22 * qreal(0.5) + data->dy) + linear.off) * gss) * FIXPT_SIZE));
5021
5022 while (count--) {
5023 int y = spans->y;
5024
5025#if QT_CONFIG(raster_64bit)
5026 data->solidColor = qt_gradient_pixel64_fixed(data: &data->gradient, fixed_pos: yinc * y + off);
5027#else
5028 data->solidColor = qt_gradient_pixel_fixed(&data->gradient, yinc * y + off);
5029#endif
5030 blend_color(1, spans, userData);
5031 ++spans;
5032 }
5033}
5034
5035void qBlendGradient(int count, const QT_FT_Span *spans, void *userData)
5036{
5037 QSpanData *data = reinterpret_cast<QSpanData *>(userData);
5038 bool isVerticalGradient =
5039 data->txop <= QTransform::TxScale &&
5040 data->type == QSpanData::LinearGradient &&
5041 data->gradient.linear.end.x == data->gradient.linear.origin.x;
5042 switch (data->rasterBuffer->format) {
5043 case QImage::Format_Invalid:
5044 break;
5045 case QImage::Format_RGB32:
5046 case QImage::Format_ARGB32_Premultiplied:
5047 if (isVerticalGradient)
5048 return blend_vertical_gradient_argb(count, spans, userData);
5049 return blend_src_generic(count, spans, userData);
5050#if defined(__SSE2__) || defined(__ARM_NEON__) || (Q_PROCESSOR_WORDSIZE == 8)
5051 case QImage::Format_ARGB32:
5052 case QImage::Format_RGBA8888:
5053#endif
5054 case QImage::Format_BGR30:
5055 case QImage::Format_A2BGR30_Premultiplied:
5056 case QImage::Format_RGB30:
5057 case QImage::Format_A2RGB30_Premultiplied:
5058 case QImage::Format_RGBX64:
5059 case QImage::Format_RGBA64:
5060 case QImage::Format_RGBA64_Premultiplied:
5061#if !QT_CONFIG(raster_fp)
5062 case QImage::Format_RGBX16FPx4:
5063 case QImage::Format_RGBA16FPx4:
5064 case QImage::Format_RGBA16FPx4_Premultiplied:
5065 case QImage::Format_RGBX32FPx4:
5066 case QImage::Format_RGBA32FPx4:
5067 case QImage::Format_RGBA32FPx4_Premultiplied:
5068#endif
5069#if QT_CONFIG(raster_64bit)
5070 if (isVerticalGradient)
5071 return blend_vertical_gradient<blend_color_generic_rgb64>(count, spans, userData);
5072 return blend_src_generic_rgb64(count, spans, userData);
5073#endif // QT_CONFIG(raster_64bit)
5074#if QT_CONFIG(raster_fp)
5075 case QImage::Format_RGBX16FPx4:
5076 case QImage::Format_RGBA16FPx4:
5077 case QImage::Format_RGBA16FPx4_Premultiplied:
5078 case QImage::Format_RGBX32FPx4:
5079 case QImage::Format_RGBA32FPx4:
5080 case QImage::Format_RGBA32FPx4_Premultiplied:
5081 if (isVerticalGradient)
5082 return blend_vertical_gradient<blend_color_generic_fp>(count, spans, userData);
5083 return blend_src_generic_fp(count, spans, userData);
5084#endif
5085 default:
5086 if (isVerticalGradient)
5087 return blend_vertical_gradient<blend_color_generic>(count, spans, userData);
5088 return blend_src_generic(count, spans, userData);
5089 }
5090 Q_UNREACHABLE();
5091}
5092
5093template <class DST> static
5094inline void qt_bitmapblit_template(QRasterBuffer *rasterBuffer,
5095 int x, int y, DST color,
5096 const uchar *map,
5097 int mapWidth, int mapHeight, int mapStride)
5098{
5099 DST *dest = reinterpret_cast<DST *>(rasterBuffer->scanLine(y)) + x;
5100 const int destStride = rasterBuffer->stride<DST>();
5101
5102 if (mapWidth > 8) {
5103 while (--mapHeight >= 0) {
5104 int x0 = 0;
5105 int n = 0;
5106 for (int x = 0; x < mapWidth; x += 8) {
5107 uchar s = map[x >> 3];
5108 for (int i = 0; i < 8; ++i) {
5109 if (s & 0x80) {
5110 ++n;
5111 } else {
5112 if (n) {
5113 qt_memfill(dest + x0, color, n);
5114 x0 += n + 1;
5115 n = 0;
5116 } else {
5117 ++x0;
5118 }
5119 if (!s) {
5120 x0 += 8 - 1 - i;
5121 break;
5122 }
5123 }
5124 s <<= 1;
5125 }
5126 }
5127 if (n)
5128 qt_memfill(dest + x0, color, n);
5129 dest += destStride;
5130 map += mapStride;
5131 }
5132 } else {
5133 while (--mapHeight >= 0) {
5134 int x0 = 0;
5135 int n = 0;
5136 for (uchar s = *map; s; s <<= 1) {
5137 if (s & 0x80) {
5138 ++n;
5139 } else if (n) {
5140 qt_memfill(dest + x0, color, n);
5141 x0 += n + 1;
5142 n = 0;
5143 } else {
5144 ++x0;
5145 }
5146 }
5147 if (n)
5148 qt_memfill(dest + x0, color, n);
5149 dest += destStride;
5150 map += mapStride;
5151 }
5152 }
5153}
5154
5155inline static void qt_bitmapblit_argb32(QRasterBuffer *rasterBuffer,
5156 int x, int y, const QRgba64 &color,
5157 const uchar *map,
5158 int mapWidth, int mapHeight, int mapStride)
5159{
5160 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: color.toArgb32(),
5161 map, mapWidth, mapHeight, mapStride);
5162}
5163
5164inline static void qt_bitmapblit_rgba8888(QRasterBuffer *rasterBuffer,
5165 int x, int y, const QRgba64 &color,
5166 const uchar *map,
5167 int mapWidth, int mapHeight, int mapStride)
5168{
5169 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, color: ARGB2RGBA(x: color.toArgb32()),
5170 map, mapWidth, mapHeight, mapStride);
5171}
5172
5173template<QtPixelOrder PixelOrder>
5174inline static void qt_bitmapblit_rgb30(QRasterBuffer *rasterBuffer,
5175 int x, int y, const QRgba64 &color,
5176 const uchar *map,
5177 int mapWidth, int mapHeight, int mapStride)
5178{
5179 qt_bitmapblit_template<quint32>(rasterBuffer, x, y, qConvertRgb64ToRgb30<PixelOrder>(color),
5180 map, mapWidth, mapHeight, mapStride);
5181}
5182
5183inline static void qt_bitmapblit_quint16(QRasterBuffer *rasterBuffer,
5184 int x, int y, const QRgba64 &color,
5185 const uchar *map,
5186 int mapWidth, int mapHeight, int mapStride)
5187{
5188 qt_bitmapblit_template<quint16>(rasterBuffer, x, y, color: color.toRgb16(),
5189 map, mapWidth, mapHeight, mapStride);
5190}
5191
5192static inline void grayBlendPixel(quint32 *dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5193{
5194 // Do a gammacorrected gray alphablend...
5195 const QRgba64 dstLinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
5196
5197 QRgba64 blend = interpolate255(x: srcLinear, alpha1: coverage, y: dstLinear, alpha2: 255 - coverage);
5198
5199 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5200}
5201
5202static inline void alphamapblend_argb32(quint32 *dst, int coverage, QRgba64 srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5203{
5204 if (coverage == 0) {
5205 // nothing
5206 } else if (coverage == 255 || !colorProfile) {
5207 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5208 } else if (*dst < 0xff000000) {
5209 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5210 blend_pixel(dst&: *dst, src, const_alpha: coverage);
5211 } else if (src >= 0xff000000) {
5212 grayBlendPixel(dst, coverage, srcLinear, colorProfile);
5213 } else {
5214 // First do naive blend with text-color
5215 QRgb s = *dst;
5216 blend_pixel(dst&: s, src);
5217 // Then gamma-corrected blend with glyph shape
5218 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5219 grayBlendPixel(dst, coverage, srcLinear: s64, colorProfile);
5220 }
5221}
5222
5223#if QT_CONFIG(raster_64bit)
5224
5225static inline void grayBlendPixel(QRgba64 &dst, int coverage, QRgba64 srcLinear, const QColorTrcLut *colorProfile)
5226{
5227 // Do a gammacorrected gray alphablend...
5228 QRgba64 dstColor = dst;
5229 if (colorProfile) {
5230 if (dstColor.isOpaque())
5231 dstColor = colorProfile->toLinear(rgb64: dstColor);
5232 else if (!dstColor.isTransparent())
5233 dstColor = colorProfile->toLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5234 }
5235
5236 blend_pixel(dst&: dstColor, src: srcLinear, const_alpha: coverage);
5237
5238 if (colorProfile) {
5239 if (dstColor.isOpaque())
5240 dstColor = colorProfile->fromLinear(rgb64: dstColor);
5241 else if (!dstColor.isTransparent())
5242 dstColor = colorProfile->fromLinear(rgb64: dstColor.unpremultiplied()).premultiplied();
5243 }
5244 dst = dstColor;
5245}
5246
5247static inline void alphamapblend_generic(int coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5248{
5249 if (coverage == 0) {
5250 // nothing
5251 } else if (coverage == 255) {
5252 blend_pixel(dst&: dest[x], src);
5253 } else if (src.isOpaque()) {
5254 grayBlendPixel(dst&: dest[x], coverage, srcLinear, colorProfile);
5255 } else {
5256 // First do naive blend with text-color
5257 QRgba64 s = dest[x];
5258 blend_pixel(dst&: s, src);
5259 // Then gamma-corrected blend with glyph shape
5260 if (colorProfile)
5261 s = colorProfile->toLinear(rgb64: s);
5262 grayBlendPixel(dst&: dest[x], coverage, srcLinear: s, colorProfile);
5263 }
5264}
5265
5266static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5267 int x, int y, const QRgba64 &color,
5268 const uchar *map,
5269 int mapWidth, int mapHeight, int mapStride,
5270 const QClipData *clip, bool useGammaCorrection)
5271{
5272 if (color.isTransparent())
5273 return;
5274
5275 const QColorTrcLut *colorProfile = nullptr;
5276
5277 if (useGammaCorrection)
5278 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5279
5280 QRgba64 srcColor = color;
5281 if (colorProfile && color.isOpaque())
5282 srcColor = colorProfile->toLinear(rgb64: srcColor);
5283
5284 alignas(8) QRgba64 buffer[BufferSize];
5285 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5286 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5287
5288 if (!clip) {
5289 for (int ly = 0; ly < mapHeight; ++ly) {
5290 int i = x;
5291 int length = mapWidth;
5292 while (length > 0) {
5293 int l = qMin(a: BufferSize, b: length);
5294 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5295 for (int j=0; j < l; ++j) {
5296 const int coverage = map[j + (i - x)];
5297 alphamapblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5298 }
5299 if (destStore64)
5300 destStore64(rasterBuffer, i, y + ly, dest, l);
5301 length -= l;
5302 i += l;
5303 }
5304 map += mapStride;
5305 }
5306 } else {
5307 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5308
5309 int top = qMax(a: y, b: 0);
5310 map += (top - y) * mapStride;
5311
5312 const_cast<QClipData *>(clip)->initialize();
5313 for (int yp = top; yp<bottom; ++yp) {
5314 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5315
5316 for (int i=0; i<line.count; ++i) {
5317 const QT_FT_Span &clip = line.spans[i];
5318
5319 int start = qMax<int>(a: x, b: clip.x);
5320 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5321 if (end <= start)
5322 continue;
5323 Q_ASSERT(end - start <= BufferSize);
5324 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5325
5326 for (int xp=start; xp<end; ++xp) {
5327 const int coverage = map[xp - x];
5328 alphamapblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
5329 }
5330 if (destStore64)
5331 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5332 } // for (i -> line.count)
5333 map += mapStride;
5334 } // for (yp -> bottom)
5335 }
5336}
5337#else
5338static void qt_alphamapblit_generic(QRasterBuffer *rasterBuffer,
5339 int x, int y, const QRgba64 &color,
5340 const uchar *map,
5341 int mapWidth, int mapHeight, int mapStride,
5342 const QClipData *clip, bool useGammaCorrection)
5343{
5344 if (color.isTransparent())
5345 return;
5346
5347 const quint32 c = color.toArgb32();
5348
5349 const QColorTrcLut *colorProfile = nullptr;
5350
5351 if (useGammaCorrection)
5352 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5353
5354 QRgba64 srcColor = color;
5355 if (colorProfile && color.isOpaque())
5356 srcColor = colorProfile->toLinear(srcColor);
5357
5358 quint32 buffer[BufferSize];
5359 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5360 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5361
5362 if (!clip) {
5363 for (int ly = 0; ly < mapHeight; ++ly) {
5364 int i = x;
5365 int length = mapWidth;
5366 while (length > 0) {
5367 int l = qMin(BufferSize, length);
5368 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5369 for (int j=0; j < l; ++j) {
5370 const int coverage = map[j + (i - x)];
5371 alphamapblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5372 }
5373 if (destStore)
5374 destStore(rasterBuffer, i, y + ly, dest, l);
5375 length -= l;
5376 i += l;
5377 }
5378 map += mapStride;
5379 }
5380 } else {
5381 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5382
5383 int top = qMax(y, 0);
5384 map += (top - y) * mapStride;
5385
5386 const_cast<QClipData *>(clip)->initialize();
5387 for (int yp = top; yp<bottom; ++yp) {
5388 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5389
5390 for (int i=0; i<line.count; ++i) {
5391 const QT_FT_Span &clip = line.spans[i];
5392
5393 int start = qMax<int>(x, clip.x);
5394 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5395 if (end <= start)
5396 continue;
5397 Q_ASSERT(end - start <= BufferSize);
5398 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5399
5400 for (int xp=start; xp<end; ++xp) {
5401 const int coverage = map[xp - x];
5402 alphamapblend_argb32(dest + xp - x, coverage, srcColor, color, colorProfile);
5403 }
5404 if (destStore)
5405 destStore(rasterBuffer, start, clip.y, dest, end - start);
5406 } // for (i -> line.count)
5407 map += mapStride;
5408 } // for (yp -> bottom)
5409 }
5410}
5411#endif
5412
5413static inline void alphamapblend_quint16(int coverage, quint16 *dest, int x, const quint16 srcColor)
5414{
5415 if (coverage == 0) {
5416 // nothing
5417 } else if (coverage == 255) {
5418 dest[x] = srcColor;
5419 } else {
5420 dest[x] = BYTE_MUL_RGB16(x: srcColor, a: coverage)
5421 + BYTE_MUL_RGB16(x: dest[x], a: 255 - coverage);
5422 }
5423}
5424
5425void qt_alphamapblit_quint16(QRasterBuffer *rasterBuffer,
5426 int x, int y, const QRgba64 &color,
5427 const uchar *map,
5428 int mapWidth, int mapHeight, int mapStride,
5429 const QClipData *clip, bool useGammaCorrection)
5430{
5431 if (useGammaCorrection || !color.isOpaque()) {
5432 qt_alphamapblit_generic(rasterBuffer, x, y, color, map, mapWidth, mapHeight, mapStride, clip, useGammaCorrection);
5433 return;
5434 }
5435
5436 const quint16 c = color.toRgb16();
5437
5438 if (!clip) {
5439 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
5440 const int destStride = rasterBuffer->stride<quint16>();
5441 while (--mapHeight >= 0) {
5442 for (int i = 0; i < mapWidth; ++i)
5443 alphamapblend_quint16(coverage: map[i], dest, x: i, srcColor: c);
5444 dest += destStride;
5445 map += mapStride;
5446 }
5447 } else {
5448 int top = qMax(a: y, b: 0);
5449 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5450 map += (top - y) * mapStride;
5451
5452 const_cast<QClipData *>(clip)->initialize();
5453 for (int yp = top; yp<bottom; ++yp) {
5454 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5455
5456 quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y: yp));
5457
5458 for (int i=0; i<line.count; ++i) {
5459 const QT_FT_Span &clip = line.spans[i];
5460
5461 int start = qMax<int>(a: x, b: clip.x);
5462 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5463
5464 for (int xp=start; xp<end; ++xp)
5465 alphamapblend_quint16(coverage: map[xp - x], dest, x: xp, srcColor: c);
5466 } // for (i -> line.count)
5467 map += mapStride;
5468 } // for (yp -> bottom)
5469 }
5470}
5471
5472static void qt_alphamapblit_argb32(QRasterBuffer *rasterBuffer,
5473 int x, int y, const QRgba64 &color,
5474 const uchar *map,
5475 int mapWidth, int mapHeight, int mapStride,
5476 const QClipData *clip, bool useGammaCorrection)
5477{
5478 const quint32 c = color.toArgb32();
5479 const int destStride = rasterBuffer->stride<quint32>();
5480
5481 if (color.isTransparent())
5482 return;
5483
5484 const QColorTrcLut *colorProfile = nullptr;
5485
5486 if (useGammaCorrection)
5487 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA8Text();
5488
5489 QRgba64 srcColor = color;
5490 if (colorProfile && color.isOpaque())
5491 srcColor = colorProfile->toLinear(rgb64: srcColor);
5492
5493 if (!clip) {
5494 quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5495 while (--mapHeight >= 0) {
5496 for (int i = 0; i < mapWidth; ++i) {
5497 const int coverage = map[i];
5498 alphamapblend_argb32(dst: dest + i, coverage, srcLinear: srcColor, src: c, colorProfile);
5499 }
5500 dest += destStride;
5501 map += mapStride;
5502 }
5503 } else {
5504 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5505
5506 int top = qMax(a: y, b: 0);
5507 map += (top - y) * mapStride;
5508
5509 const_cast<QClipData *>(clip)->initialize();
5510 for (int yp = top; yp<bottom; ++yp) {
5511 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5512
5513 quint32 *dest = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
5514
5515 for (int i=0; i<line.count; ++i) {
5516 const QT_FT_Span &clip = line.spans[i];
5517
5518 int start = qMax<int>(a: x, b: clip.x);
5519 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5520
5521 for (int xp=start; xp<end; ++xp) {
5522 const int coverage = map[xp - x];
5523 alphamapblend_argb32(dst: dest + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
5524 } // for (i -> line.count)
5525 } // for (yp -> bottom)
5526 map += mapStride;
5527 }
5528 }
5529}
5530
5531static inline int qRgbAvg(QRgb rgb)
5532{
5533 return (qRed(rgb) * 5 + qGreen(rgb) * 6 + qBlue(rgb) * 5) / 16;
5534}
5535
5536static inline void rgbBlendPixel(quint32 *dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
5537{
5538 // Do a gammacorrected RGB alphablend...
5539 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear64(rgb32: *dst) : QRgba64::fromArgb32(rgb: *dst);
5540
5541 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
5542
5543 *dst = colorProfile ? colorProfile->fromLinear64(rgb64: blend) : toArgb32(rgba64: blend);
5544}
5545
5546static inline QRgb rgbBlend(QRgb d, QRgb s, uint rgbAlpha)
5547{
5548#if defined(__SSE2__)
5549 __m128i vd = _mm_cvtsi32_si128(a: d);
5550 __m128i vs = _mm_cvtsi32_si128(a: s);
5551 __m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
5552 const __m128i vz = _mm_setzero_si128();
5553 vd = _mm_unpacklo_epi8(a: vd, b: vz);
5554 vs = _mm_unpacklo_epi8(a: vs, b: vz);
5555 va = _mm_unpacklo_epi8(a: va, b: vz);
5556 __m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: 255), b: va);
5557 vs = _mm_mullo_epi16(a: vs, b: va);
5558 vd = _mm_mullo_epi16(a: vd, b: vb);
5559 vd = _mm_add_epi16(a: vd, b: vs);
5560 vd = _mm_add_epi16(a: vd, b: _mm_srli_epi16(a: vd, count: 8));
5561 vd = _mm_add_epi16(a: vd, b: _mm_set1_epi16(w: 0x80));
5562 vd = _mm_srli_epi16(a: vd, count: 8);
5563 vd = _mm_packus_epi16(a: vd, b: vd);
5564 return _mm_cvtsi128_si32(a: vd);
5565#else
5566 const int dr = qRed(d);
5567 const int dg = qGreen(d);
5568 const int db = qBlue(d);
5569
5570 const int sr = qRed(s);
5571 const int sg = qGreen(s);
5572 const int sb = qBlue(s);
5573
5574 const int mr = qRed(rgbAlpha);
5575 const int mg = qGreen(rgbAlpha);
5576 const int mb = qBlue(rgbAlpha);
5577
5578 const int nr = qt_div_255(sr * mr + dr * (255 - mr));
5579 const int ng = qt_div_255(sg * mg + dg * (255 - mg));
5580 const int nb = qt_div_255(sb * mb + db * (255 - mb));
5581
5582 return 0xff000000 | (nr << 16) | (ng << 8) | nb;
5583#endif
5584}
5585
5586static inline void alphargbblend_argb32(quint32 *dst, uint coverage, const QRgba64 &srcLinear, quint32 src, const QColorTrcLut *colorProfile)
5587{
5588 if (coverage == 0xff000000) {
5589 // nothing
5590 } else if (coverage == 0xffffffff && qAlpha(rgb: src) == 255) {
5591 blend_pixel(dst&: *dst, src);
5592 } else if (*dst < 0xff000000) {
5593 // Give up and do a naive gray alphablend. Needed to deal with ARGB32 and invalid ARGB32_premultiplied, see QTBUG-60571
5594 blend_pixel(dst&: *dst, src, const_alpha: qRgbAvg(rgb: coverage));
5595 } else if (!colorProfile) {
5596 // First do naive blend with text-color
5597 QRgb s = *dst;
5598 blend_pixel(dst&: s, src);
5599 // Then a naive blend with glyph shape
5600 *dst = rgbBlend(d: *dst, s, rgbAlpha: coverage);
5601 } else if (srcLinear.isOpaque()) {
5602 rgbBlendPixel(dst, coverage, slinear: srcLinear, colorProfile);
5603 } else {
5604 // First do naive blend with text-color
5605 QRgb s = *dst;
5606 blend_pixel(dst&: s, src);
5607 // Then gamma-corrected blend with glyph shape
5608 QRgba64 s64 = colorProfile ? colorProfile->toLinear64(rgb32: s) : QRgba64::fromArgb32(rgb: s);
5609 rgbBlendPixel(dst, coverage, slinear: s64, colorProfile);
5610 }
5611}
5612
5613#if QT_CONFIG(raster_64bit)
5614static inline void rgbBlendPixel(QRgba64 &dst, int coverage, QRgba64 slinear, const QColorTrcLut *colorProfile)
5615{
5616 // Do a gammacorrected RGB alphablend...
5617 const QRgba64 dlinear = colorProfile ? colorProfile->toLinear(rgb64: dst) : dst;
5618
5619 QRgba64 blend = rgbBlend(d: dlinear, s: slinear, rgbAlpha: coverage);
5620
5621 dst = colorProfile ? colorProfile->fromLinear(rgb64: blend) : blend;
5622}
5623
5624static inline void alphargbblend_generic(uint coverage, QRgba64 *dest, int x, const QRgba64 &srcLinear, const QRgba64 &src, const QColorTrcLut *colorProfile)
5625{
5626 if (coverage == 0xff000000) {
5627 // nothing
5628 } else if (coverage == 0xffffffff) {
5629 blend_pixel(dst&: dest[x], src);
5630 } else if (!dest[x].isOpaque()) {
5631 // Do a gray alphablend.
5632 alphamapblend_generic(coverage: qRgbAvg(rgb: coverage), dest, x, srcLinear, src, colorProfile);
5633 } else if (src.isOpaque()) {
5634 rgbBlendPixel(dst&: dest[x], coverage, slinear: srcLinear, colorProfile);
5635 } else {
5636 // First do naive blend with text-color
5637 QRgba64 s = dest[x];
5638 blend_pixel(dst&: s, src);
5639 // Then gamma-corrected blend with glyph shape
5640 if (colorProfile)
5641 s = colorProfile->toLinear(rgb64: s);
5642 rgbBlendPixel(dst&: dest[x], coverage, slinear: s, colorProfile);
5643 }
5644}
5645
5646static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
5647 int x, int y, const QRgba64 &color,
5648 const uint *src, int mapWidth, int mapHeight, int srcStride,
5649 const QClipData *clip, bool useGammaCorrection)
5650{
5651 if (color.isTransparent())
5652 return;
5653
5654 const QColorTrcLut *colorProfile = nullptr;
5655
5656 if (useGammaCorrection)
5657 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5658
5659 QRgba64 srcColor = color;
5660 if (colorProfile && color.isOpaque())
5661 srcColor = colorProfile->toLinear(rgb64: srcColor);
5662
5663 alignas(8) QRgba64 buffer[BufferSize];
5664 const DestFetchProc64 destFetch64 = destFetchProc64[rasterBuffer->format];
5665 const DestStoreProc64 destStore64 = destStoreProc64[rasterBuffer->format];
5666
5667 if (!clip) {
5668 for (int ly = 0; ly < mapHeight; ++ly) {
5669 int i = x;
5670 int length = mapWidth;
5671 while (length > 0) {
5672 int l = qMin(a: BufferSize, b: length);
5673 QRgba64 *dest = destFetch64(buffer, rasterBuffer, i, y + ly, l);
5674 for (int j=0; j < l; ++j) {
5675 const uint coverage = src[j + (i - x)];
5676 alphargbblend_generic(coverage, dest, x: j, srcLinear: srcColor, src: color, colorProfile);
5677 }
5678 if (destStore64)
5679 destStore64(rasterBuffer, i, y + ly, dest, l);
5680 length -= l;
5681 i += l;
5682 }
5683 src += srcStride;
5684 }
5685 } else {
5686 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5687
5688 int top = qMax(a: y, b: 0);
5689 src += (top - y) * srcStride;
5690
5691 const_cast<QClipData *>(clip)->initialize();
5692 for (int yp = top; yp<bottom; ++yp) {
5693 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5694
5695 for (int i=0; i<line.count; ++i) {
5696 const QT_FT_Span &clip = line.spans[i];
5697
5698 int start = qMax<int>(a: x, b: clip.x);
5699 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5700 if (end <= start)
5701 continue;
5702 Q_ASSERT(end - start <= BufferSize);
5703 QRgba64 *dest = destFetch64(buffer, rasterBuffer, start, clip.y, end - start);
5704
5705 for (int xp=start; xp<end; ++xp) {
5706 const uint coverage = src[xp - x];
5707 alphargbblend_generic(coverage, dest, x: xp - start, srcLinear: srcColor, src: color, colorProfile);
5708 }
5709 if (destStore64)
5710 destStore64(rasterBuffer, start, clip.y, dest, end - start);
5711 } // for (i -> line.count)
5712 src += srcStride;
5713 } // for (yp -> bottom)
5714 }
5715}
5716#else
5717static void qt_alphargbblit_generic(QRasterBuffer *rasterBuffer,
5718 int x, int y, const QRgba64 &color,
5719 const uint *src, int mapWidth, int mapHeight, int srcStride,
5720 const QClipData *clip, bool useGammaCorrection)
5721{
5722 if (color.isTransparent())
5723 return;
5724
5725 const quint32 c = color.toArgb32();
5726
5727 const QColorTrcLut *colorProfile = nullptr;
5728
5729 if (useGammaCorrection)
5730 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5731
5732 QRgba64 srcColor = color;
5733 if (colorProfile && color.isOpaque())
5734 srcColor = colorProfile->toLinear(srcColor);
5735
5736 quint32 buffer[BufferSize];
5737 const DestFetchProc destFetch = destFetchProc[rasterBuffer->format];
5738 const DestStoreProc destStore = destStoreProc[rasterBuffer->format];
5739
5740 if (!clip) {
5741 for (int ly = 0; ly < mapHeight; ++ly) {
5742 int i = x;
5743 int length = mapWidth;
5744 while (length > 0) {
5745 int l = qMin(BufferSize, length);
5746 quint32 *dest = destFetch(buffer, rasterBuffer, i, y + ly, l);
5747 for (int j=0; j < l; ++j) {
5748 const uint coverage = src[j + (i - x)];
5749 alphargbblend_argb32(dest + j, coverage, srcColor, c, colorProfile);
5750 }
5751 if (destStore)
5752 destStore(rasterBuffer, i, y + ly, dest, l);
5753 length -= l;
5754 i += l;
5755 }
5756 src += srcStride;
5757 }
5758 } else {
5759 int bottom = qMin(y + mapHeight, rasterBuffer->height());
5760
5761 int top = qMax(y, 0);
5762 src += (top - y) * srcStride;
5763
5764 const_cast<QClipData *>(clip)->initialize();
5765 for (int yp = top; yp<bottom; ++yp) {
5766 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5767
5768 for (int i=0; i<line.count; ++i) {
5769 const QT_FT_Span &clip = line.spans[i];
5770
5771 int start = qMax<int>(x, clip.x);
5772 int end = qMin<int>(x + mapWidth, clip.x + clip.len);
5773 if (end <= start)
5774 continue;
5775 Q_ASSERT(end - start <= BufferSize);
5776 quint32 *dest = destFetch(buffer, rasterBuffer, start, clip.y, end - start);
5777
5778 for (int xp=start; xp<end; ++xp) {
5779 const uint coverage = src[xp - x];
5780 alphargbblend_argb32(dest + xp - start, coverage, srcColor, c, colorProfile);
5781 }
5782 if (destStore)
5783 destStore(rasterBuffer, start, clip.y, dest, end - start);
5784 } // for (i -> line.count)
5785 src += srcStride;
5786 } // for (yp -> bottom)
5787 }
5788}
5789#endif
5790
5791static void qt_alphargbblit_argb32(QRasterBuffer *rasterBuffer,
5792 int x, int y, const QRgba64 &color,
5793 const uint *src, int mapWidth, int mapHeight, int srcStride,
5794 const QClipData *clip, bool useGammaCorrection)
5795{
5796 if (color.isTransparent())
5797 return;
5798
5799 const quint32 c = color.toArgb32();
5800
5801 const QColorTrcLut *colorProfile = nullptr;
5802
5803 if (useGammaCorrection)
5804 colorProfile = QGuiApplicationPrivate::instance()->colorProfileForA32Text();
5805
5806 QRgba64 srcColor = color;
5807 if (colorProfile && color.isOpaque())
5808 srcColor = colorProfile->toLinear(rgb64: srcColor);
5809
5810 if (!clip) {
5811 quint32 *dst = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
5812 const int destStride = rasterBuffer->stride<quint32>();
5813 while (--mapHeight >= 0) {
5814 for (int i = 0; i < mapWidth; ++i) {
5815 const uint coverage = src[i];
5816 alphargbblend_argb32(dst: dst + i, coverage, srcLinear: srcColor, src: c, colorProfile);
5817 }
5818
5819 dst += destStride;
5820 src += srcStride;
5821 }
5822 } else {
5823 int bottom = qMin(a: y + mapHeight, b: rasterBuffer->height());
5824
5825 int top = qMax(a: y, b: 0);
5826 src += (top - y) * srcStride;
5827
5828 const_cast<QClipData *>(clip)->initialize();
5829 for (int yp = top; yp<bottom; ++yp) {
5830 const QClipData::ClipLine &line = clip->m_clipLines[yp];
5831
5832 quint32 *dst = reinterpret_cast<quint32 *>(rasterBuffer->scanLine(y: yp));
5833
5834 for (int i=0; i<line.count; ++i) {
5835 const QT_FT_Span &clip = line.spans[i];
5836
5837 int start = qMax<int>(a: x, b: clip.x);
5838 int end = qMin<int>(a: x + mapWidth, b: clip.x + clip.len);
5839
5840 for (int xp=start; xp<end; ++xp) {
5841 const uint coverage = src[xp - x];
5842 alphargbblend_argb32(dst: dst + xp, coverage, srcLinear: srcColor, src: c, colorProfile);
5843 }
5844 } // for (i -> line.count)
5845 src += srcStride;
5846 } // for (yp -> bottom)
5847
5848 }
5849}
5850
5851static void qt_rectfill_argb32(QRasterBuffer *rasterBuffer,
5852 int x, int y, int width, int height,
5853 const QRgba64 &color)
5854{
5855 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5856 value: color.toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5857}
5858
5859static void qt_rectfill_quint16(QRasterBuffer *rasterBuffer,
5860 int x, int y, int width, int height,
5861 const QRgba64 &color)
5862{
5863 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
5864 quint32 c32 = color.toArgb32();
5865 quint16 c16;
5866 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c16), &c32, 0, 1, nullptr, nullptr);
5867 qt_rectfill<quint16>(dest: reinterpret_cast<quint16 *>(rasterBuffer->buffer()),
5868 value: c16, x, y, width, height, stride: rasterBuffer->bytesPerLine());
5869}
5870
5871static void qt_rectfill_quint24(QRasterBuffer *rasterBuffer,
5872 int x, int y, int width, int height,
5873 const QRgba64 &color)
5874{
5875 const QPixelLayout &layout = qPixelLayouts[rasterBuffer->format];
5876 quint32 c32 = color.toArgb32();
5877 quint24 c24;
5878 layout.storeFromARGB32PM(reinterpret_cast<uchar *>(&c24), &c32, 0, 1, nullptr, nullptr);
5879 qt_rectfill<quint24>(dest: reinterpret_cast<quint24 *>(rasterBuffer->buffer()),
5880 value: c24, x, y, width, height, stride: rasterBuffer->bytesPerLine());
5881}
5882
5883static void qt_rectfill_nonpremul_argb32(QRasterBuffer *rasterBuffer,
5884 int x, int y, int width, int height,
5885 const QRgba64 &color)
5886{
5887 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5888 value: color.unpremultiplied().toArgb32(), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5889}
5890
5891static void qt_rectfill_rgba(QRasterBuffer *rasterBuffer,
5892 int x, int y, int width, int height,
5893 const QRgba64 &color)
5894{
5895 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5896 value: ARGB2RGBA(x: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5897}
5898
5899static void qt_rectfill_nonpremul_rgba(QRasterBuffer *rasterBuffer,
5900 int x, int y, int width, int height,
5901 const QRgba64 &color)
5902{
5903 qt_rectfill<quint32>(dest: reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5904 value: ARGB2RGBA(x: color.unpremultiplied().toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5905}
5906
5907template<QtPixelOrder PixelOrder>
5908static void qt_rectfill_rgb30(QRasterBuffer *rasterBuffer,
5909 int x, int y, int width, int height,
5910 const QRgba64 &color)
5911{
5912 qt_rectfill<quint32>(reinterpret_cast<quint32 *>(rasterBuffer->buffer()),
5913 qConvertRgb64ToRgb30<PixelOrder>(color), x, y, width, height, rasterBuffer->bytesPerLine());
5914}
5915
5916static void qt_rectfill_alpha(QRasterBuffer *rasterBuffer,
5917 int x, int y, int width, int height,
5918 const QRgba64 &color)
5919{
5920 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
5921 value: color.alpha() >> 8, x, y, width, height, stride: rasterBuffer->bytesPerLine());
5922}
5923
5924static void qt_rectfill_gray(QRasterBuffer *rasterBuffer,
5925 int x, int y, int width, int height,
5926 const QRgba64 &color)
5927{
5928 qt_rectfill<quint8>(dest: reinterpret_cast<quint8 *>(rasterBuffer->buffer()),
5929 value: qGray(rgb: color.toArgb32()), x, y, width, height, stride: rasterBuffer->bytesPerLine());
5930}
5931
5932static void qt_rectfill_quint64(QRasterBuffer *rasterBuffer,
5933 int x, int y, int width, int height,
5934 const QRgba64 &color)
5935{
5936 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
5937 quint64 c64;
5938 store(reinterpret_cast<uchar *>(&c64), &color, 0, 1, nullptr, nullptr);
5939 qt_rectfill<quint64>(dest: reinterpret_cast<quint64 *>(rasterBuffer->buffer()),
5940 value: c64, x, y, width, height, stride: rasterBuffer->bytesPerLine());
5941}
5942
5943static void qt_rectfill_fp32x4(QRasterBuffer *rasterBuffer,
5944 int x, int y, int width, int height,
5945 const QRgba64 &color)
5946{
5947 const auto store = qStoreFromRGBA64PM[rasterBuffer->format];
5948 QRgbaFloat32 c;
5949 store(reinterpret_cast<uchar *>(&c), &color, 0, 1, nullptr, nullptr);
5950 qt_rectfill<QRgbaFloat32>(dest: reinterpret_cast<QRgbaFloat32 *>(rasterBuffer->buffer()),
5951 value: c, x, y, width, height, stride: rasterBuffer->bytesPerLine());
5952}
5953
5954// Map table for destination image format. Contains function pointers
5955// for blends of various types unto the destination
5956
5957DrawHelper qDrawHelper[QImage::NImageFormats] =
5958{
5959 // Format_Invalid,
5960 { .blendColor: nullptr, .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr },
5961 // Format_Mono,
5962 {
5963 .blendColor: blend_color_generic,
5964 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
5965 },
5966 // Format_MonoLSB,
5967 {
5968 .blendColor: blend_color_generic,
5969 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
5970 },
5971 // Format_Indexed8,
5972 {
5973 .blendColor: blend_color_generic,
5974 .bitmapBlit: nullptr, .alphamapBlit: nullptr, .alphaRGBBlit: nullptr, .fillRect: nullptr
5975 },
5976 // Format_RGB32,
5977 {
5978 .blendColor: blend_color_argb,
5979 .bitmapBlit: qt_bitmapblit_argb32,
5980 .alphamapBlit: qt_alphamapblit_argb32,
5981 .alphaRGBBlit: qt_alphargbblit_argb32,
5982 .fillRect: qt_rectfill_argb32
5983 },
5984 // Format_ARGB32,
5985 {
5986 .blendColor: blend_color_generic,
5987 .bitmapBlit: qt_bitmapblit_argb32,
5988 .alphamapBlit: qt_alphamapblit_argb32,
5989 .alphaRGBBlit: qt_alphargbblit_argb32,
5990 .fillRect: qt_rectfill_nonpremul_argb32
5991 },
5992 // Format_ARGB32_Premultiplied
5993 {
5994 .blendColor: blend_color_argb,
5995 .bitmapBlit: qt_bitmapblit_argb32,
5996 .alphamapBlit: qt_alphamapblit_argb32,
5997 .alphaRGBBlit: qt_alphargbblit_argb32,
5998 .fillRect: qt_rectfill_argb32
5999 },
6000 // Format_RGB16
6001 {
6002 .blendColor: blend_color_generic,
6003 .bitmapBlit: qt_bitmapblit_quint16,
6004 .alphamapBlit: qt_alphamapblit_quint16,
6005 .alphaRGBBlit: qt_alphargbblit_generic,
6006 .fillRect: qt_rectfill_quint16
6007 },
6008 // Format_ARGB8565_Premultiplied
6009 {
6010 .blendColor: blend_color_generic,
6011 .bitmapBlit: nullptr,
6012 .alphamapBlit: qt_alphamapblit_generic,
6013 .alphaRGBBlit: qt_alphargbblit_generic,
6014 .fillRect: qt_rectfill_quint24
6015 },
6016 // Format_RGB666
6017 {
6018 .blendColor: blend_color_generic,
6019 .bitmapBlit: nullptr,
6020 .alphamapBlit: qt_alphamapblit_generic,
6021 .alphaRGBBlit: qt_alphargbblit_generic,
6022 .fillRect: qt_rectfill_quint24
6023 },
6024 // Format_ARGB6666_Premultiplied
6025 {
6026 .blendColor: blend_color_generic,
6027 .bitmapBlit: nullptr,
6028 .alphamapBlit: qt_alphamapblit_generic,
6029 .alphaRGBBlit: qt_alphargbblit_generic,
6030 .fillRect: qt_rectfill_quint24
6031 },
6032 // Format_RGB555
6033 {
6034 .blendColor: blend_color_generic,
6035 .bitmapBlit: nullptr,
6036 .alphamapBlit: qt_alphamapblit_generic,
6037 .alphaRGBBlit: qt_alphargbblit_generic,
6038 .fillRect: qt_rectfill_quint16
6039 },
6040 // Format_ARGB8555_Premultiplied
6041 {
6042 .blendColor: blend_color_generic,
6043 .bitmapBlit: nullptr,
6044 .alphamapBlit: qt_alphamapblit_generic,
6045 .alphaRGBBlit: qt_alphargbblit_generic,
6046 .fillRect: qt_rectfill_quint24
6047 },
6048 // Format_RGB888
6049 {
6050 .blendColor: blend_color_generic,
6051 .bitmapBlit: nullptr,
6052 .alphamapBlit: qt_alphamapblit_generic,
6053 .alphaRGBBlit: qt_alphargbblit_generic,
6054 .fillRect: qt_rectfill_quint24
6055 },
6056 // Format_RGB444
6057 {
6058 .blendColor: blend_color_generic,
6059 .bitmapBlit: nullptr,
6060 .alphamapBlit: qt_alphamapblit_generic,
6061 .alphaRGBBlit: qt_alphargbblit_generic,
6062 .fillRect: qt_rectfill_quint16
6063 },
6064 // Format_ARGB4444_Premultiplied
6065 {
6066 .blendColor: blend_color_generic,
6067 .bitmapBlit: nullptr,
6068 .alphamapBlit: qt_alphamapblit_generic,
6069 .alphaRGBBlit: qt_alphargbblit_generic,
6070 .fillRect: qt_rectfill_quint16
6071 },
6072 // Format_RGBX8888
6073 {
6074 .blendColor: blend_color_generic,
6075 .bitmapBlit: qt_bitmapblit_rgba8888,
6076 .alphamapBlit: qt_alphamapblit_generic,
6077 .alphaRGBBlit: qt_alphargbblit_generic,
6078 .fillRect: qt_rectfill_rgba
6079 },
6080 // Format_RGBA8888
6081 {
6082 .blendColor: blend_color_generic,
6083 .bitmapBlit: qt_bitmapblit_rgba8888,
6084 .alphamapBlit: qt_alphamapblit_generic,
6085 .alphaRGBBlit: qt_alphargbblit_generic,
6086 .fillRect: qt_rectfill_nonpremul_rgba
6087 },
6088 // Format_RGB8888_Premultiplied
6089 {
6090 .blendColor: blend_color_generic,
6091 .bitmapBlit: qt_bitmapblit_rgba8888,
6092 .alphamapBlit: qt_alphamapblit_generic,
6093 .alphaRGBBlit: qt_alphargbblit_generic,
6094 .fillRect: qt_rectfill_rgba
6095 },
6096 // Format_BGR30
6097 {
6098 .blendColor: blend_color_generic_rgb64,
6099 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6100 .alphamapBlit: qt_alphamapblit_generic,
6101 .alphaRGBBlit: qt_alphargbblit_generic,
6102 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6103 },
6104 // Format_A2BGR30_Premultiplied
6105 {
6106 .blendColor: blend_color_generic_rgb64,
6107 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderBGR>,
6108 .alphamapBlit: qt_alphamapblit_generic,
6109 .alphaRGBBlit: qt_alphargbblit_generic,
6110 .fillRect: qt_rectfill_rgb30<PixelOrderBGR>
6111 },
6112 // Format_RGB30
6113 {
6114 .blendColor: blend_color_generic_rgb64,
6115 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6116 .alphamapBlit: qt_alphamapblit_generic,
6117 .alphaRGBBlit: qt_alphargbblit_generic,
6118 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6119 },
6120 // Format_A2RGB30_Premultiplied
6121 {
6122 .blendColor: blend_color_generic_rgb64,
6123 .bitmapBlit: qt_bitmapblit_rgb30<PixelOrderRGB>,
6124 .alphamapBlit: qt_alphamapblit_generic,
6125 .alphaRGBBlit: qt_alphargbblit_generic,
6126 .fillRect: qt_rectfill_rgb30<PixelOrderRGB>
6127 },
6128 // Format_Alpha8
6129 {
6130 .blendColor: blend_color_generic,
6131 .bitmapBlit: nullptr,
6132 .alphamapBlit: qt_alphamapblit_generic,
6133 .alphaRGBBlit: qt_alphargbblit_generic,
6134 .fillRect: qt_rectfill_alpha
6135 },
6136 // Format_Grayscale8
6137 {
6138 .blendColor: blend_color_generic,
6139 .bitmapBlit: nullptr,
6140 .alphamapBlit: qt_alphamapblit_generic,
6141 .alphaRGBBlit: qt_alphargbblit_generic,
6142 .fillRect: qt_rectfill_gray
6143 },
6144 // Format_RGBX64
6145 {
6146 .blendColor: blend_color_generic_rgb64,
6147 .bitmapBlit: nullptr,
6148 .alphamapBlit: qt_alphamapblit_generic,
6149 .alphaRGBBlit: qt_alphargbblit_generic,
6150 .fillRect: qt_rectfill_quint64
6151 },
6152 // Format_RGBA64
6153 {
6154 .blendColor: blend_color_generic_rgb64,
6155 .bitmapBlit: nullptr,
6156 .alphamapBlit: qt_alphamapblit_generic,
6157 .alphaRGBBlit: qt_alphargbblit_generic,
6158 .fillRect: qt_rectfill_quint64
6159 },
6160 // Format_RGBA64_Premultiplied
6161 {
6162 .blendColor: blend_color_generic_rgb64,
6163 .bitmapBlit: nullptr,
6164 .alphamapBlit: qt_alphamapblit_generic,
6165 .alphaRGBBlit: qt_alphargbblit_generic,
6166 .fillRect: qt_rectfill_quint64
6167 },
6168 // Format_Grayscale16
6169 {
6170 .blendColor: blend_color_generic_rgb64,
6171 .bitmapBlit: nullptr,
6172 .alphamapBlit: qt_alphamapblit_generic,
6173 .alphaRGBBlit: qt_alphargbblit_generic,
6174 .fillRect: qt_rectfill_quint16
6175 },
6176 // Format_BGR888
6177 {
6178 .blendColor: blend_color_generic,
6179 .bitmapBlit: nullptr,
6180 .alphamapBlit: qt_alphamapblit_generic,
6181 .alphaRGBBlit: qt_alphargbblit_generic,
6182 .fillRect: qt_rectfill_quint24
6183 },
6184 // Format_RGBX16FPx4
6185 {
6186 .blendColor: blend_color_generic_fp,
6187 .bitmapBlit: nullptr,
6188 .alphamapBlit: qt_alphamapblit_generic,
6189 .alphaRGBBlit: qt_alphargbblit_generic,
6190 .fillRect: qt_rectfill_quint64
6191 },
6192 // Format_RGBA16FPx4
6193 {
6194 .blendColor: blend_color_generic_fp,
6195 .bitmapBlit: nullptr,
6196 .alphamapBlit: qt_alphamapblit_generic,
6197 .alphaRGBBlit: qt_alphargbblit_generic,
6198 .fillRect: qt_rectfill_quint64
6199 },
6200 // Format_RGBA16FPx4_Premultiplied
6201 {
6202 .blendColor: blend_color_generic_fp,
6203 .bitmapBlit: nullptr,
6204 .alphamapBlit: qt_alphamapblit_generic,
6205 .alphaRGBBlit: qt_alphargbblit_generic,
6206 .fillRect: qt_rectfill_quint64
6207 },
6208 // Format_RGBX32FPx4
6209 {
6210 .blendColor: blend_color_generic_fp,
6211 .bitmapBlit: nullptr,
6212 .alphamapBlit: qt_alphamapblit_generic,
6213 .alphaRGBBlit: qt_alphargbblit_generic,
6214 .fillRect: qt_rectfill_fp32x4
6215 },
6216 // Format_RGBA32FPx4
6217 {
6218 .blendColor: blend_color_generic_fp,
6219 .bitmapBlit: nullptr,
6220 .alphamapBlit: qt_alphamapblit_generic,
6221 .alphaRGBBlit: qt_alphargbblit_generic,
6222 .fillRect: qt_rectfill_fp32x4
6223 },
6224 // Format_RGBA32FPx4_Premultiplied
6225 {
6226 .blendColor: blend_color_generic_fp,
6227 .bitmapBlit: nullptr,
6228 .alphamapBlit: qt_alphamapblit_generic,
6229 .alphaRGBBlit: qt_alphargbblit_generic,
6230 .fillRect: qt_rectfill_fp32x4
6231 },
6232};
6233
6234#if !defined(Q_PROCESSOR_X86)
6235void qt_memfill64(quint64 *dest, quint64 color, qsizetype count)
6236{
6237 qt_memfill_template<quint64>(dest, color, count);
6238}
6239#endif
6240
6241#if defined(QT_COMPILER_SUPPORTS_SSSE3) && defined(Q_CC_GNU) && !defined(Q_CC_CLANG)
6242__attribute__((optimize("no-tree-vectorize")))
6243#endif
6244void qt_memfill24(quint24 *dest, quint24 color, qsizetype count)
6245{
6246# ifdef QT_COMPILER_SUPPORTS_SSSE3
6247 extern void qt_memfill24_ssse3(quint24 *, quint24, qsizetype);
6248 if (qCpuHasFeature(SSSE3))
6249 return qt_memfill24_ssse3(dest, color, count);
6250# endif
6251
6252 const quint32 v = color;
6253 quint24 *end = dest + count;
6254
6255 // prolog: align dest to 32bit
6256 while ((quintptr(dest) & 0x3) && dest < end) {
6257 *dest++ = v;
6258 }
6259 if (dest >= end)
6260 return;
6261
6262 const uint val1 = qFromBigEndian(source: (v << 8) | (v >> 16));
6263 const uint val2 = qFromBigEndian(source: (v << 16) | (v >> 8));
6264 const uint val3 = qFromBigEndian(source: (v << 24) | (v >> 0));
6265
6266 for ( ; dest <= (end - 4); dest += 4) {
6267 quint32 *dst = reinterpret_cast<quint32 *>(dest);
6268 dst[0] = val1;
6269 dst[1] = val2;
6270 dst[2] = val3;
6271 }
6272
6273 // less than 4px left
6274 switch (end - dest) {
6275 case 3:
6276 *dest++ = v;
6277 Q_FALLTHROUGH();
6278 case 2:
6279 *dest++ = v;
6280 Q_FALLTHROUGH();
6281 case 1:
6282 *dest++ = v;
6283 }
6284}
6285
6286void qt_memfill16(quint16 *dest, quint16 value, qsizetype count)
6287{
6288 const int align = quintptr(dest) & 0x3;
6289 if (align) {
6290 *dest++ = value;
6291 --count;
6292 }
6293
6294 if (count & 0x1)
6295 dest[count - 1] = value;
6296
6297 const quint32 value32 = (value << 16) | value;
6298 qt_memfill32(reinterpret_cast<quint32*>(dest), value32, count / 2);
6299}
6300
6301#if defined(Q_PROCESSOR_X86)
6302void (*qt_memfill32)(quint32 *dest, quint32 value, qsizetype count) = nullptr;
6303void (*qt_memfill64)(quint64 *dest, quint64 value, qsizetype count) = nullptr;
6304#elif !defined(__ARM_NEON__) && !defined(__MIPS_DSP__)
6305void qt_memfill32(quint32 *dest, quint32 color, qsizetype count)
6306{
6307 qt_memfill_template<quint32>(dest, color, count);
6308}
6309#endif
6310
6311#ifdef QT_COMPILER_SUPPORTS_SSE4_1
6312template<QtPixelOrder> void QT_FASTCALL storeA2RGB30PMFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6313#endif
6314
6315extern void qInitBlendFunctions();
6316
6317static void qInitDrawhelperFunctions()
6318{
6319 // Set up basic blend function tables.
6320 qInitBlendFunctions();
6321
6322#if defined(Q_PROCESSOR_X86) && !defined(__SSE2__)
6323 qt_memfill32 = qt_memfill_template<quint32>;
6324 qt_memfill64 = qt_memfill_template<quint64>;
6325#elif defined(__SSE2__)
6326# ifndef __haswell__
6327 qt_memfill32 = qt_memfill32_sse2;
6328 qt_memfill64 = qt_memfill64_sse2;
6329# endif
6330 qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
6331 qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
6332 qDrawHelper[QImage::Format_ARGB32_Premultiplied].bitmapBlit = qt_bitmapblit32_sse2;
6333 qDrawHelper[QImage::Format_RGB16].bitmapBlit = qt_bitmapblit16_sse2;
6334 qDrawHelper[QImage::Format_RGBX8888].bitmapBlit = qt_bitmapblit8888_sse2;
6335 qDrawHelper[QImage::Format_RGBA8888].bitmapBlit = qt_bitmapblit8888_sse2;
6336 qDrawHelper[QImage::Format_RGBA8888_Premultiplied].bitmapBlit = qt_bitmapblit8888_sse2;
6337
6338 extern void qt_scale_image_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6339 const uchar *srcPixels, int sbpl, int srch,
6340 const QRectF &targetRect,
6341 const QRectF &sourceRect,
6342 const QRect &clip,
6343 int const_alpha);
6344 qScaleFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6345 qScaleFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6346 qScaleFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6347 qScaleFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_scale_image_argb32_on_argb32_sse2;
6348
6349 extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
6350 const uchar *srcPixels, int sbpl,
6351 int w, int h,
6352 int const_alpha);
6353 extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
6354 const uchar *srcPixels, int sbpl,
6355 int w, int h,
6356 int const_alpha);
6357
6358 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6359 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
6360 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6361 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6362 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6363 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_sse2;
6364 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6365 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
6366
6367 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
6368 int y, int x, int length);
6369
6370 qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
6371
6372 extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6373 extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6374 extern void QT_FASTCALL comp_func_Source_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6375 extern void QT_FASTCALL comp_func_solid_Source_sse2(uint *destPixels, int length, uint color, uint const_alpha);
6376 extern void QT_FASTCALL comp_func_Plus_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6377 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_sse2;
6378 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_sse2;
6379 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
6380 qt_functionForModeSolid_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_sse2;
6381 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
6382
6383#ifdef QT_COMPILER_SUPPORTS_SSSE3
6384 if (qCpuHasFeature(SSSE3)) {
6385 extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
6386 const uchar *srcPixels, int sbpl,
6387 int w, int h,
6388 int const_alpha);
6389
6390 extern const uint * QT_FASTCALL qt_fetchUntransformed_888_ssse3(uint *buffer, const Operator *, const QSpanData *data,
6391 int y, int x, int length);
6392 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6393 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6394 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6395 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_ssse3;
6396 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_ssse3;
6397 extern void QT_FASTCALL rbSwap_888_ssse3(uchar *dst, const uchar *src, int count);
6398 qPixelLayouts[QImage::Format_RGB888].rbSwap = rbSwap_888_ssse3;
6399 qPixelLayouts[QImage::Format_BGR888].rbSwap = rbSwap_888_ssse3;
6400 }
6401#endif // SSSE3
6402
6403#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
6404 if (qCpuHasFeature(SSE4_1)) {
6405 extern void QT_FASTCALL convertARGB32ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *);
6406 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_sse4(uint *buffer, int count, const QList<QRgb> *);
6407 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6408 const QList<QRgb> *, QDitherInfo *);
6409 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_sse4(uint *buffer, const uchar *src, int index, int count,
6410 const QList<QRgb> *, QDitherInfo *);
6411 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6412 const QList<QRgb> *, QDitherInfo *);
6413 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uint *src, int count,
6414 const QList<QRgb> *, QDitherInfo *);
6415 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6416 const QList<QRgb> *, QDitherInfo *);
6417 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_sse4(QRgba64 *buffer, const uchar *src, int index, int count,
6418 const QList<QRgb> *, QDitherInfo *);
6419 extern void QT_FASTCALL storeARGB32FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6420 const QList<QRgb> *, QDitherInfo *);
6421 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6422 const QList<QRgb> *, QDitherInfo *);
6423 extern void QT_FASTCALL storeRGBXFromARGB32PM_sse4(uchar *dest, const uint *src, int index, int count,
6424 const QList<QRgb> *, QDitherInfo *);
6425 extern void QT_FASTCALL storeARGB32FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6426 const QList<QRgb> *, QDitherInfo *);
6427 extern void QT_FASTCALL storeRGBA8888FromRGBA64PM_sse4(uchar *dest, const QRgba64 *src, int index, int count,
6428 const QList<QRgb> *, QDitherInfo *);
6429 extern void QT_FASTCALL storeRGBA64FromRGBA64PM_sse4(uchar *, const QRgba64 *, int, int, const QList<QRgb> *, QDitherInfo *);
6430 extern void QT_FASTCALL storeRGBx64FromRGBA64PM_sse4(uchar *, const QRgba64 *, int, int, const QList<QRgb> *, QDitherInfo *);
6431 extern void QT_FASTCALL destStore64ARGB32_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6432 extern void QT_FASTCALL destStore64RGBA8888_sse4(QRasterBuffer *rasterBuffer, int x, int y, const QRgba64 *buffer, int length);
6433# ifndef __haswell__
6434 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_sse4;
6435 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_sse4;
6436 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_sse4;
6437 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_sse4;
6438 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_sse4;
6439 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_sse4;
6440 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6441 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6442 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_sse4;
6443 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_sse4;
6444# endif
6445 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_sse4;
6446 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_sse4;
6447 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_sse4;
6448 qPixelLayouts[QImage::Format_A2BGR30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderBGR>;
6449 qPixelLayouts[QImage::Format_A2RGB30_Premultiplied].storeFromARGB32PM = storeA2RGB30PMFromARGB32PM_sse4<PixelOrderRGB>;
6450 qStoreFromRGBA64PM[QImage::Format_ARGB32] = storeARGB32FromRGBA64PM_sse4;
6451 qStoreFromRGBA64PM[QImage::Format_RGBA8888] = storeRGBA8888FromRGBA64PM_sse4;
6452 qStoreFromRGBA64PM[QImage::Format_RGBX64] = storeRGBx64FromRGBA64PM_sse4;
6453 qStoreFromRGBA64PM[QImage::Format_RGBA64] = storeRGBA64FromRGBA64PM_sse4;
6454#if QT_CONFIG(raster_64bit)
6455 destStoreProc64[QImage::Format_ARGB32] = destStore64ARGB32_sse4;
6456 destStoreProc64[QImage::Format_RGBA8888] = destStore64RGBA8888_sse4;
6457#endif
6458#if QT_CONFIG(raster_fp)
6459 extern const QRgbaFloat32 *QT_FASTCALL fetchRGBA32FToRGBA32F_sse4(QRgbaFloat32 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6460 extern void QT_FASTCALL storeRGBX32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6461 extern void QT_FASTCALL storeRGBA32FFromRGBA32F_sse4(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6462 qFetchToRGBA32F[QImage::Format_RGBA32FPx4] = fetchRGBA32FToRGBA32F_sse4;
6463 qStoreFromRGBA32F[QImage::Format_RGBX32FPx4] = storeRGBX32FFromRGBA32F_sse4;
6464 qStoreFromRGBA32F[QImage::Format_RGBA32FPx4] = storeRGBA32FFromRGBA32F_sse4;
6465#endif // QT_CONFIG(raster_fp)
6466 }
6467#endif
6468
6469#if defined(QT_COMPILER_SUPPORTS_AVX2)
6470 if (qCpuHasFeature(ArchHaswell)) {
6471 qt_memfill32 = qt_memfill32_avx2;
6472 qt_memfill64 = qt_memfill64_avx2;
6473 extern void qt_blend_rgb32_on_rgb32_avx2(uchar *destPixels, int dbpl,
6474 const uchar *srcPixels, int sbpl,
6475 int w, int h, int const_alpha);
6476 extern void qt_blend_argb32_on_argb32_avx2(uchar *destPixels, int dbpl,
6477 const uchar *srcPixels, int sbpl,
6478 int w, int h, int const_alpha);
6479 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6480 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_avx2;
6481 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6482 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6483 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6484 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_avx2;
6485 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6486 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_avx2;
6487
6488 extern void QT_FASTCALL comp_func_Source_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6489 extern void QT_FASTCALL comp_func_SourceOver_avx2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha);
6490 extern void QT_FASTCALL comp_func_solid_SourceOver_avx2(uint *destPixels, int length, uint color, uint const_alpha);
6491 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_avx2;
6492 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_avx2;
6493 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_avx2;
6494#if QT_CONFIG(raster_64bit)
6495 extern void QT_FASTCALL comp_func_Source_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6496 extern void QT_FASTCALL comp_func_SourceOver_rgb64_avx2(QRgba64 *destPixels, const QRgba64 *srcPixels, int length, uint const_alpha);
6497 extern void QT_FASTCALL comp_func_solid_SourceOver_rgb64_avx2(QRgba64 *destPixels, int length, QRgba64 color, uint const_alpha);
6498 qt_functionForMode64_C[QPainter::CompositionMode_Source] = comp_func_Source_rgb64_avx2;
6499 qt_functionForMode64_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgb64_avx2;
6500 qt_functionForModeSolid64_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgb64_avx2;
6501#endif
6502#if QT_CONFIG(raster_fp)
6503 extern void QT_FASTCALL comp_func_Source_rgbafp_avx2(QRgbaFloat32 *destPixels, const QRgbaFloat32 *srcPixels, int length, uint const_alpha);
6504 extern void QT_FASTCALL comp_func_SourceOver_rgbafp_avx2(QRgbaFloat32 *destPixels, const QRgbaFloat32 *srcPixels, int length, uint const_alpha);
6505 extern void QT_FASTCALL comp_func_solid_Source_rgbafp_avx2(QRgbaFloat32 *destPixels, int length, QRgbaFloat32 color, uint const_alpha);
6506 extern void QT_FASTCALL comp_func_solid_SourceOver_rgbafp_avx2(QRgbaFloat32 *destPixels, int length, QRgbaFloat32 color, uint const_alpha);
6507 qt_functionForModeFP_C[QPainter::CompositionMode_Source] = comp_func_Source_rgbafp_avx2;
6508 qt_functionForModeFP_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_rgbafp_avx2;
6509 qt_functionForModeSolidFP_C[QPainter::CompositionMode_Source] = comp_func_solid_Source_rgbafp_avx2;
6510 qt_functionForModeSolidFP_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_rgbafp_avx2;
6511#endif
6512
6513 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6514 int &fx, int &fy, int fdx, int /*fdy*/);
6515 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_downscale_helper_avx2(uint *b, uint *end, const QTextureData &image,
6516 int &fx, int &fy, int fdx, int /*fdy*/);
6517 extern void QT_FASTCALL fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2(uint *b, uint *end, const QTextureData &image,
6518 int &fx, int &fy, int fdx, int fdy);
6519
6520 bilinearFastTransformHelperARGB32PM[0][SimpleScaleTransform] = fetchTransformedBilinearARGB32PM_simple_scale_helper_avx2;
6521 bilinearFastTransformHelperARGB32PM[0][DownscaleTransform] = fetchTransformedBilinearARGB32PM_downscale_helper_avx2;
6522 bilinearFastTransformHelperARGB32PM[0][FastRotateTransform] = fetchTransformedBilinearARGB32PM_fast_rotate_helper_avx2;
6523
6524 extern void QT_FASTCALL convertARGB32ToARGB32PM_avx2(uint *buffer, int count, const QList<QRgb> *);
6525 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_avx2(uint *buffer, int count, const QList<QRgb> *);
6526 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6527 const QList<QRgb> *, QDitherInfo *);
6528 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count,
6529 const QList<QRgb> *, QDitherInfo *);
6530 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_avx2;
6531 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_avx2;
6532 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_avx2;
6533 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_avx2;
6534
6535 extern const QRgba64 *QT_FASTCALL convertARGB32ToRGBA64PM_avx2(QRgba64 *, const uint *, int, const QList<QRgb> *, QDitherInfo *);
6536 extern const QRgba64 *QT_FASTCALL convertRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uint *, int count, const QList<QRgb> *, QDitherInfo *);
6537 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
6538 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_avx2(QRgba64 *, const uchar *, int, int, const QList<QRgb> *, QDitherInfo *);
6539 extern const QRgba64 *QT_FASTCALL fetchRGBA64ToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6540 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_avx2;
6541 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_avx2;
6542 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_avx2;
6543 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_avx2;
6544 qPixelLayouts[QImage::Format_RGBA64].fetchToRGBA64PM = fetchRGBA64ToRGBA64PM_avx2;
6545
6546 extern const uint *QT_FASTCALL fetchRGB16FToRGB32_avx2(uint *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6547 extern const uint *QT_FASTCALL fetchRGBA16FToARGB32PM_avx2(uint *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6548 extern const QRgba64 *QT_FASTCALL fetchRGBA16FPMToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6549 extern const QRgba64 *QT_FASTCALL fetchRGBA16FToRGBA64PM_avx2(QRgba64 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6550 extern void QT_FASTCALL storeRGB16FFromRGB32_avx2(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6551 extern void QT_FASTCALL storeRGBA16FFromARGB32PM_avx2(uchar *dest, const uint *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6552 qPixelLayouts[QImage::Format_RGBX16FPx4].fetchToARGB32PM = fetchRGB16FToRGB32_avx2;
6553 qPixelLayouts[QImage::Format_RGBX16FPx4].fetchToRGBA64PM = fetchRGBA16FPMToRGBA64PM_avx2;
6554 qPixelLayouts[QImage::Format_RGBX16FPx4].storeFromARGB32PM = storeRGB16FFromRGB32_avx2;
6555 qPixelLayouts[QImage::Format_RGBX16FPx4].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6556 qPixelLayouts[QImage::Format_RGBA16FPx4].fetchToARGB32PM = fetchRGBA16FToARGB32PM_avx2;
6557 qPixelLayouts[QImage::Format_RGBA16FPx4].fetchToRGBA64PM = fetchRGBA16FToRGBA64PM_avx2;
6558 qPixelLayouts[QImage::Format_RGBA16FPx4].storeFromARGB32PM = storeRGBA16FFromARGB32PM_avx2;
6559 qPixelLayouts[QImage::Format_RGBA16FPx4].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6560 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].fetchToARGB32PM = fetchRGB16FToRGB32_avx2;
6561 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].fetchToRGBA64PM = fetchRGBA16FPMToRGBA64PM_avx2;
6562 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].storeFromARGB32PM = storeRGB16FFromRGB32_avx2;
6563 qPixelLayouts[QImage::Format_RGBA16FPx4_Premultiplied].storeFromRGB32 = storeRGB16FFromRGB32_avx2;
6564#if QT_CONFIG(raster_fp)
6565 extern const QRgbaFloat32 *QT_FASTCALL fetchRGBA16FToRGBA32F_avx2(QRgbaFloat32 *buffer, const uchar *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6566 extern void QT_FASTCALL storeRGBX16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6567 extern void QT_FASTCALL storeRGBA16FFromRGBA32F_avx2(uchar *dest, const QRgbaFloat32 *src, int index, int count, const QList<QRgb> *, QDitherInfo *);
6568 qFetchToRGBA32F[QImage::Format_RGBA16FPx4] = fetchRGBA16FToRGBA32F_avx2;
6569 qStoreFromRGBA32F[QImage::Format_RGBX16FPx4] = storeRGBX16FFromRGBA32F_avx2;
6570 qStoreFromRGBA32F[QImage::Format_RGBA16FPx4] = storeRGBA16FFromRGBA32F_avx2;
6571#endif // QT_CONFIG(raster_fp)
6572 }
6573
6574#endif
6575
6576#endif // SSE2
6577
6578#if defined(__ARM_NEON__)
6579 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6580 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
6581 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6582 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6583#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6584 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6585 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBX8888] = qt_blend_rgb32_on_rgb32_neon;
6586 qBlendFunctions[QImage::Format_RGBX8888][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6587 qBlendFunctions[QImage::Format_RGBA8888_Premultiplied][QImage::Format_RGBA8888_Premultiplied] = qt_blend_argb32_on_argb32_neon;
6588#endif
6589
6590 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
6591 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
6592 qt_functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon;
6593
6594 extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
6595 int y, int x, int length);
6596
6597 qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
6598
6599 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_neon;
6600
6601#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
6602 extern void QT_FASTCALL convertARGB32ToARGB32PM_neon(uint *buffer, int count, const QList<QRgb> *);
6603 extern void QT_FASTCALL convertRGBA8888ToARGB32PM_neon(uint *buffer, int count, const QList<QRgb> *);
6604 extern const uint *QT_FASTCALL fetchARGB32ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6605 const QList<QRgb> *, QDitherInfo *);
6606 extern const uint *QT_FASTCALL fetchRGBA8888ToARGB32PM_neon(uint *buffer, const uchar *src, int index, int count,
6607 const QList<QRgb> *, QDitherInfo *);
6608 extern const QRgba64 * QT_FASTCALL convertARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6609 const QList<QRgb> *, QDitherInfo *);
6610 extern const QRgba64 * QT_FASTCALL convertRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uint *src, int count,
6611 const QList<QRgb> *, QDitherInfo *);
6612 extern const QRgba64 *QT_FASTCALL fetchARGB32ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6613 const QList<QRgb> *, QDitherInfo *);
6614 extern const QRgba64 *QT_FASTCALL fetchRGBA8888ToRGBA64PM_neon(QRgba64 *buffer, const uchar *src, int index, int count,
6615 const QList<QRgb> *, QDitherInfo *);
6616 extern void QT_FASTCALL storeARGB32FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6617 const QList<QRgb> *, QDitherInfo *);
6618 extern void QT_FASTCALL storeRGBA8888FromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6619 const QList<QRgb> *, QDitherInfo *);
6620 extern void QT_FASTCALL storeRGBXFromARGB32PM_neon(uchar *dest, const uint *src, int index, int count,
6621 const QList<QRgb> *, QDitherInfo *);
6622 qPixelLayouts[QImage::Format_ARGB32].fetchToARGB32PM = fetchARGB32ToARGB32PM_neon;
6623 qPixelLayouts[QImage::Format_ARGB32].convertToARGB32PM = convertARGB32ToARGB32PM_neon;
6624 qPixelLayouts[QImage::Format_ARGB32].storeFromARGB32PM = storeARGB32FromARGB32PM_neon;
6625 qPixelLayouts[QImage::Format_ARGB32].fetchToRGBA64PM = fetchARGB32ToRGBA64PM_neon;
6626 qPixelLayouts[QImage::Format_ARGB32].convertToRGBA64PM = convertARGB32ToRGBA64PM_neon;
6627 qPixelLayouts[QImage::Format_RGBA8888].fetchToARGB32PM = fetchRGBA8888ToARGB32PM_neon;
6628 qPixelLayouts[QImage::Format_RGBA8888].convertToARGB32PM = convertRGBA8888ToARGB32PM_neon;
6629 qPixelLayouts[QImage::Format_RGBA8888].storeFromARGB32PM = storeRGBA8888FromARGB32PM_neon;
6630 qPixelLayouts[QImage::Format_RGBA8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6631 qPixelLayouts[QImage::Format_RGBA8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6632 qPixelLayouts[QImage::Format_RGBX8888].storeFromARGB32PM = storeRGBXFromARGB32PM_neon;
6633 qPixelLayouts[QImage::Format_RGBX8888].fetchToRGBA64PM = fetchRGBA8888ToRGBA64PM_neon;
6634 qPixelLayouts[QImage::Format_RGBX8888].convertToRGBA64PM = convertRGBA8888ToRGBA64PM_neon;
6635#endif
6636
6637#if defined(ENABLE_PIXMAN_DRAWHELPERS)
6638 // The RGB16 helpers are using Arm32 assemblythat has not been ported to AArch64
6639 qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
6640 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
6641 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
6642
6643 qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
6644 qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
6645
6646 qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
6647 qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
6648
6649 qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
6650
6651 destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
6652 destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
6653
6654 qMemRotateFunctions[QPixelLayout::BPP16][0] = qt_memrotate90_16_neon;
6655 qMemRotateFunctions[QPixelLayout::BPP16][2] = qt_memrotate270_16_neon;
6656#endif
6657#endif // defined(__ARM_NEON__)
6658
6659#if defined(__MIPS_DSP__)
6660 // Composition functions are all DSP r1
6661 qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
6662 qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
6663 qt_functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
6664 qt_functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
6665 qt_functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
6666 qt_functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
6667 qt_functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
6668 qt_functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
6669 qt_functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
6670 qt_functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
6671
6672 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
6673 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
6674 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
6675 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
6676 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
6677 qt_functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
6678 qt_functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
6679 qt_functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
6680
6681 qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
6682 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_mips_dsp;
6683 qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
6684 qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_mips_dsp;
6685
6686 destFetchProc[QImage::Format_ARGB32] = qt_destFetchARGB32_mips_dsp;
6687
6688 destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
6689
6690 sourceFetchUntransformed[QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
6691 sourceFetchUntransformed[QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
6692 sourceFetchUntransformed[QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
6693
6694#if defined(__MIPS_DSPR2__)
6695 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
6696 sourceFetchUntransformed[QImage::Format_RGB16] = qt_fetchUntransformedRGB16_mips_dspr2;
6697#else
6698 qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
6699#endif // defined(__MIPS_DSPR2__)
6700#endif // defined(__MIPS_DSP__)
6701}
6702
6703// Ensure initialization if this object file is linked.
6704Q_CONSTRUCTOR_FUNCTION(qInitDrawhelperFunctions);
6705
6706QT_END_NAMESPACE
6707

source code of qtbase/src/gui/painting/qdrawhelper.cpp