1/****************************************************************************
2**
3** Copyright (C) 2018 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40
41#include "qcolortransform.h"
42#include "qcolortransform_p.h"
43
44#include "qcolormatrix_p.h"
45#include "qcolorspace_p.h"
46#include "qcolortrc_p.h"
47#include "qcolortrclut_p.h"
48
49#include <QtCore/qatomic.h>
50#include <QtCore/qmath.h>
51#include <QtGui/qcolor.h>
52#include <QtGui/qtransform.h>
53#include <QtCore/private/qsimd_p.h>
54
55#include <qdebug.h>
56
57QT_BEGIN_NAMESPACE
58
59QColorTrcLut *lutFromTrc(const QColorTrc &trc)
60{
61 if (trc.m_type == QColorTrc::Type::Table)
62 return QColorTrcLut::fromTransferTable(trc.m_table);
63 if (trc.m_type == QColorTrc::Type::Function)
64 return QColorTrcLut::fromTransferFunction(trc.m_fun);
65 qWarning() << "TRC uninitialized";
66 return nullptr;
67}
68
69void QColorTransformPrivate::updateLutsIn() const
70{
71 if (colorSpaceIn->lut.generated.loadAcquire())
72 return;
73 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
74 if (colorSpaceIn->lut.generated.loadRelaxed())
75 return;
76
77 for (int i = 0; i < 3; ++i) {
78 if (!colorSpaceIn->trc[i].isValid())
79 return;
80 }
81
82 if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) {
83 colorSpaceIn->lut[0].reset(lutFromTrc(colorSpaceIn->trc[0]));
84 colorSpaceIn->lut[1] = colorSpaceIn->lut[0];
85 colorSpaceIn->lut[2] = colorSpaceIn->lut[0];
86 } else {
87 for (int i = 0; i < 3; ++i)
88 colorSpaceIn->lut[i].reset(lutFromTrc(colorSpaceIn->trc[i]));
89 }
90
91 colorSpaceIn->lut.generated.storeRelease(1);
92}
93
94void QColorTransformPrivate::updateLutsOut() const
95{
96 if (colorSpaceOut->lut.generated.loadAcquire())
97 return;
98 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
99 if (colorSpaceOut->lut.generated.loadRelaxed())
100 return;
101 for (int i = 0; i < 3; ++i) {
102 if (!colorSpaceOut->trc[i].isValid())
103 return;
104 }
105
106 if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) {
107 colorSpaceOut->lut[0].reset(lutFromTrc(colorSpaceOut->trc[0]));
108 colorSpaceOut->lut[1] = colorSpaceOut->lut[0];
109 colorSpaceOut->lut[2] = colorSpaceOut->lut[0];
110 } else {
111 for (int i = 0; i < 3; ++i)
112 colorSpaceOut->lut[i].reset(lutFromTrc(colorSpaceOut->trc[i]));
113 }
114
115 colorSpaceOut->lut.generated.storeRelease(1);
116}
117
118/*!
119 \class QColorTransform
120 \brief The QColorTransform class is a transformation between color spaces.
121 \since 5.14
122
123 \ingroup painting
124 \ingroup appearance
125 \inmodule QtGui
126
127 QColorTransform is an instantiation of a transformation between color spaces.
128 It can be applied on color and pixels to convert them from one color space to
129 another.
130
131 Setting up a QColorTransform takes some preprocessing, so keeping around
132 QColorTransforms that you need often is recommended, instead of generating
133 them on the fly.
134*/
135
136
137QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept
138 : d(colorTransform.d)
139{
140 if (d)
141 d->ref.ref();
142}
143
144
145QColorTransform::~QColorTransform()
146{
147 if (d && !d->ref.deref())
148 delete d;
149}
150
151/*!
152 Applies the color transformation on the QRgb value \a argb.
153
154 The input should be opaque or unpremultiplied.
155*/
156QRgb QColorTransform::map(QRgb argb) const
157{
158 if (!d)
159 return argb;
160 constexpr float f = 1.0f / 255.0f;
161 QColorVector c = { qRed(argb) * f, qGreen(argb) * f, qBlue(argb) * f };
162 c.x = d->colorSpaceIn->trc[0].apply(c.x);
163 c.y = d->colorSpaceIn->trc[1].apply(c.y);
164 c.z = d->colorSpaceIn->trc[2].apply(c.z);
165 c = d->colorMatrix.map(c);
166 c.x = std::max(0.0f, std::min(1.0f, c.x));
167 c.y = std::max(0.0f, std::min(1.0f, c.y));
168 c.z = std::max(0.0f, std::min(1.0f, c.z));
169 if (d->colorSpaceOut->lut.generated.loadAcquire()) {
170 c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x);
171 c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y);
172 c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z);
173 } else {
174 c.x = d->colorSpaceOut->trc[0].applyInverse(c.x);
175 c.y = d->colorSpaceOut->trc[1].applyInverse(c.y);
176 c.z = d->colorSpaceOut->trc[2].applyInverse(c.z);
177 }
178
179 return qRgba(c.x * 255 + 0.5f, c.y * 255 + 0.5f, c.z * 255 + 0.5f, qAlpha(argb));
180}
181
182/*!
183 Applies the color transformation on the QRgba64 value \a rgba64.
184
185 The input should be opaque or unpremultiplied.
186*/
187QRgba64 QColorTransform::map(QRgba64 rgba64) const
188{
189 if (!d)
190 return rgba64;
191 constexpr float f = 1.0f / 65535.0f;
192 QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f };
193 c.x = d->colorSpaceIn->trc[0].apply(c.x);
194 c.y = d->colorSpaceIn->trc[1].apply(c.y);
195 c.z = d->colorSpaceIn->trc[2].apply(c.z);
196 c = d->colorMatrix.map(c);
197 c.x = std::max(0.0f, std::min(1.0f, c.x));
198 c.y = std::max(0.0f, std::min(1.0f, c.y));
199 c.z = std::max(0.0f, std::min(1.0f, c.z));
200 if (d->colorSpaceOut->lut.generated.loadAcquire()) {
201 c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x);
202 c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y);
203 c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z);
204 } else {
205 c.x = d->colorSpaceOut->trc[0].applyInverse(c.x);
206 c.y = d->colorSpaceOut->trc[1].applyInverse(c.y);
207 c.z = d->colorSpaceOut->trc[2].applyInverse(c.z);
208 }
209
210 return QRgba64::fromRgba64(c.x * 65535, c.y * 65535, c.z * 65535, rgba64.alpha());
211}
212
213/*!
214 Applies the color transformation on the QColor value \a color.
215
216*/
217QColor QColorTransform::map(const QColor &color) const
218{
219 if (!d)
220 return color;
221 QColor clr = color;
222 if (color.spec() != QColor::ExtendedRgb || color.spec() != QColor::Rgb)
223 clr = clr.toRgb();
224
225 QColorVector c = { (float)clr.redF(), (float)clr.greenF(), (float)clr.blueF() };
226 if (clr.spec() == QColor::ExtendedRgb) {
227 c.x = d->colorSpaceIn->trc[0].applyExtended(c.x);
228 c.y = d->colorSpaceIn->trc[1].applyExtended(c.y);
229 c.z = d->colorSpaceIn->trc[2].applyExtended(c.z);
230 } else {
231 c.x = d->colorSpaceIn->trc[0].apply(c.x);
232 c.y = d->colorSpaceIn->trc[1].apply(c.y);
233 c.z = d->colorSpaceIn->trc[2].apply(c.z);
234 }
235 c = d->colorMatrix.map(c);
236 bool inGamut = c.x >= 0.0f && c.x <= 1.0f && c.y >= 0.0f && c.y <= 1.0f && c.z >= 0.0f && c.z <= 1.0f;
237 if (inGamut) {
238 if (d->colorSpaceOut->lut.generated.loadAcquire()) {
239 c.x = d->colorSpaceOut->lut[0]->fromLinear(c.x);
240 c.y = d->colorSpaceOut->lut[1]->fromLinear(c.y);
241 c.z = d->colorSpaceOut->lut[2]->fromLinear(c.z);
242 } else {
243 c.x = d->colorSpaceOut->trc[0].applyInverse(c.x);
244 c.y = d->colorSpaceOut->trc[1].applyInverse(c.y);
245 c.z = d->colorSpaceOut->trc[2].applyInverse(c.z);
246 }
247 } else {
248 c.x = d->colorSpaceOut->trc[0].applyInverseExtended(c.x);
249 c.y = d->colorSpaceOut->trc[1].applyInverseExtended(c.y);
250 c.z = d->colorSpaceOut->trc[2].applyInverseExtended(c.z);
251 }
252 QColor out;
253 out.setRgbF(c.x, c.y, c.z, color.alphaF());
254 return out;
255}
256
257// Optimized sub-routines for fast block based conversion:
258
259static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix)
260{
261#if defined(__SSE2__)
262 const __m128 minV = _mm_set1_ps(0.0f);
263 const __m128 maxV = _mm_set1_ps(1.0f);
264 const __m128 xMat = _mm_loadu_ps(&colorMatrix.r.x);
265 const __m128 yMat = _mm_loadu_ps(&colorMatrix.g.x);
266 const __m128 zMat = _mm_loadu_ps(&colorMatrix.b.x);
267 for (qsizetype j = 0; j < len; ++j) {
268 __m128 c = _mm_loadu_ps(&buffer[j].x);
269 __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0));
270 __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1));
271 __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2));
272 cx = _mm_mul_ps(cx, xMat);
273 cy = _mm_mul_ps(cy, yMat);
274 cz = _mm_mul_ps(cz, zMat);
275 cx = _mm_add_ps(cx, cy);
276 cx = _mm_add_ps(cx, cz);
277 // Clamp:
278 cx = _mm_min_ps(cx, maxV);
279 cx = _mm_max_ps(cx, minV);
280 _mm_storeu_ps(&buffer[j].x, cx);
281 }
282#else
283 for (int j = 0; j < len; ++j) {
284 const QColorVector cv = colorMatrix.map(buffer[j]);
285 buffer[j].x = std::max(0.0f, std::min(1.0f, cv.x));
286 buffer[j].y = std::max(0.0f, std::min(1.0f, cv.y));
287 buffer[j].z = std::max(0.0f, std::min(1.0f, cv.z));
288 }
289#endif
290}
291
292template<typename T>
293static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
294template<typename T>
295static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
296
297#if defined(__SSE2__)
298// Load to [0-alpha] in 4x32 SIMD
299template<typename T>
300static inline void loadP(const T &p, __m128i &v);
301
302template<>
303inline void loadP<QRgb>(const QRgb &p, __m128i &v)
304{
305 v = _mm_cvtsi32_si128(p);
306#if defined(__SSE4_1__)
307 v = _mm_cvtepu8_epi32(v);
308#else
309 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
310 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
311#endif
312}
313
314template<>
315inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v)
316{
317 v = _mm_loadl_epi64((const __m128i *)&p);
318#if defined(__SSE4_1__)
319 v = _mm_cvtepu16_epi32(v);
320#else
321 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
322#endif
323 // Shuffle to ARGB as the template below expects it
324 v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2));
325}
326
327template<typename T>
328static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
329{
330 const __m128 v4080 = _mm_set1_ps(4080.f);
331 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
332 for (qsizetype i = 0; i < len; ++i) {
333 __m128i v;
334 loadP<T>(src[i], v);
335 __m128 vf = _mm_cvtepi32_ps(v);
336 // Approximate 1/a:
337 __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3));
338 __m128 via = _mm_rcp_ps(va);
339 via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va)));
340 // v * (1/a)
341 vf = _mm_mul_ps(vf, via);
342
343 // Handle zero alpha
344 __m128 vAlphaMask = _mm_cmpeq_ps(va, _mm_set1_ps(0.0f));
345 vf = _mm_andnot_ps(vAlphaMask, vf);
346
347 // LUT
348 v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
349 const int ridx = _mm_extract_epi16(v, 4);
350 const int gidx = _mm_extract_epi16(v, 2);
351 const int bidx = _mm_extract_epi16(v, 0);
352 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
353 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
354 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
355 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
356
357 _mm_storeu_ps(&buffer[i].x, vf);
358 }
359}
360
361// Load to [0-4080] in 4x32 SIMD
362template<typename T>
363static inline void loadPU(const T &p, __m128i &v);
364
365template<>
366inline void loadPU<QRgb>(const QRgb &p, __m128i &v)
367{
368 v = _mm_cvtsi32_si128(p);
369#if defined(__SSE4_1__)
370 v = _mm_cvtepu8_epi32(v);
371#else
372 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
373 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
374#endif
375 v = _mm_slli_epi32(v, 4);
376}
377
378template<>
379inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v)
380{
381 v = _mm_loadl_epi64((const __m128i *)&p);
382 v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
383#if defined(__SSE4_1__)
384 v = _mm_cvtepu16_epi32(v);
385#else
386 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
387#endif
388 v = _mm_srli_epi32(v, 4);
389 // Shuffle to ARGB as the template below expects it
390 v = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 0, 1, 2));
391}
392
393template<typename T>
394void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
395{
396 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
397 for (qsizetype i = 0; i < len; ++i) {
398 __m128i v;
399 loadPU<T>(src[i], v);
400 const int ridx = _mm_extract_epi16(v, 4);
401 const int gidx = _mm_extract_epi16(v, 2);
402 const int bidx = _mm_extract_epi16(v, 0);
403 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
404 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
405 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
406 __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
407 _mm_storeu_ps(&buffer[i].x, vf);
408 }
409}
410
411#else
412template<>
413void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
414{
415 for (qsizetype i = 0; i < len; ++i) {
416 const uint p = src[i];
417 const int a = qAlpha(p);
418 if (a) {
419 const float ia = 4080.0f / a;
420 const int ridx = int(qRed(p) * ia + 0.5f);
421 const int gidx = int(qGreen(p) * ia + 0.5f);
422 const int bidx = int(qBlue(p) * ia + 0.5f);
423 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
424 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
425 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
426 } else {
427 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
428 }
429 }
430}
431
432template<>
433void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
434{
435 for (qsizetype i = 0; i < len; ++i) {
436 const QRgba64 &p = src[i];
437 const int a = p.alpha();
438 if (a) {
439 const float ia = 4080.0f / a;
440 const int ridx = int(p.red() * ia + 0.5f);
441 const int gidx = int(p.green() * ia + 0.5f);
442 const int bidx = int(p.blue() * ia + 0.5f);
443 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
444 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
445 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
446 } else {
447 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
448 }
449 }
450}
451
452template<>
453void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
454{
455 for (qsizetype i = 0; i < len; ++i) {
456 const uint p = src[i];
457 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(qRed(p));
458 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(qGreen(p));
459 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(qBlue(p));
460 }
461}
462
463template<>
464void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
465{
466 for (qsizetype i = 0; i < len; ++i) {
467 const QRgba64 &p = src[i];
468 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(p.red());
469 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(p.green());
470 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(p.blue());
471 }
472}
473#endif
474
475static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
476 const QColorTransformPrivate *d_ptr)
477{
478#if defined(__SSE2__)
479 const __m128 v4080 = _mm_set1_ps(4080.f);
480 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
481 for (qsizetype i = 0; i < len; ++i) {
482 const int a = qAlpha(src[i]);
483 __m128 vf = _mm_loadu_ps(&buffer[i].x);
484 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
485 __m128 va = _mm_set1_ps(a);
486 va = _mm_mul_ps(va, iFF00);
487 const int ridx = _mm_extract_epi16(v, 0);
488 const int gidx = _mm_extract_epi16(v, 2);
489 const int bidx = _mm_extract_epi16(v, 4);
490 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 4);
491 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
492 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0);
493 vf = _mm_cvtepi32_ps(v);
494 vf = _mm_mul_ps(vf, va);
495 v = _mm_cvtps_epi32(vf);
496 v = _mm_packs_epi32(v, v);
497 v = _mm_insert_epi16(v, a, 3);
498 v = _mm_packus_epi16(v, v);
499 dst[i] = _mm_cvtsi128_si32(v);
500 }
501#else
502 for (qsizetype i = 0; i < len; ++i) {
503 const int a = qAlpha(src[i]);
504 const float fa = a / (255.0f * 256.0f);
505 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)];
506 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)];
507 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)];
508 dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
509 }
510#endif
511}
512
513static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
514 const QColorTransformPrivate *d_ptr)
515{
516#if defined(__SSE2__)
517 const __m128 v4080 = _mm_set1_ps(4080.f);
518 for (qsizetype i = 0; i < len; ++i) {
519 const int a = qAlpha(src[i]);
520 __m128 vf = _mm_loadu_ps(&buffer[i].x);
521 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
522 const int ridx = _mm_extract_epi16(v, 0);
523 const int gidx = _mm_extract_epi16(v, 2);
524 const int bidx = _mm_extract_epi16(v, 4);
525 v = _mm_setzero_si128();
526 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2);
527 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
528 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0);
529 v = _mm_add_epi16(v, _mm_set1_epi16(0x80));
530 v = _mm_srli_epi16(v, 8);
531 v = _mm_insert_epi16(v, a, 3);
532 v = _mm_packus_epi16(v, v);
533 dst[i] = _mm_cvtsi128_si32(v);
534 }
535#else
536 for (qsizetype i = 0; i < len; ++i) {
537 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
538 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
539 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
540 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
541 }
542#endif
543}
544
545static void storeOpaque(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
546 const QColorTransformPrivate *d_ptr)
547{
548 Q_UNUSED(src);
549#if defined(__SSE2__)
550 const __m128 v4080 = _mm_set1_ps(4080.f);
551 for (qsizetype i = 0; i < len; ++i) {
552 __m128 vf = _mm_loadu_ps(&buffer[i].x);
553 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, v4080));
554 const int ridx = _mm_extract_epi16(v, 0);
555 const int gidx = _mm_extract_epi16(v, 2);
556 const int bidx = _mm_extract_epi16(v, 4);
557 v = _mm_setzero_si128();
558 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 2);
559 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
560 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 0);
561 v = _mm_add_epi16(v, _mm_set1_epi16(0x80));
562 v = _mm_srli_epi16(v, 8);
563 v = _mm_insert_epi16(v, 255, 3);
564 v = _mm_packus_epi16(v, v);
565 dst[i] = _mm_cvtsi128_si32(v);
566 }
567#else
568 for (qsizetype i = 0; i < len; ++i) {
569 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
570 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
571 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
572 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
573 }
574#endif
575}
576
577static void storePremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len,
578 const QColorTransformPrivate *d_ptr)
579{
580 for (qsizetype i = 0; i < len; ++i) {
581 const int a = src[i].alpha();
582 const float fa = a / (255.0f * 256.0f);
583 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * 4080.0f + 0.5f)];
584 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * 4080.0f + 0.5f)];
585 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * 4080.0f + 0.5f)];
586 dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
587 }
588}
589
590static void storeUnpremultiplied(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len,
591 const QColorTransformPrivate *d_ptr)
592{
593 for (qsizetype i = 0; i < len; ++i) {
594 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
595 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
596 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
597 dst[i] = qRgba64(r, g, b, src[i].alpha());
598 }
599}
600
601static void storeOpaque(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len,
602 const QColorTransformPrivate *d_ptr)
603{
604 Q_UNUSED(src);
605 for (qsizetype i = 0; i < len; ++i) {
606 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
607 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
608 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
609 dst[i] = qRgba64(r, g, b, 0xFFFF);
610 }
611}
612
613static constexpr qsizetype WorkBlockSize = 256;
614
615template<typename T>
616void QColorTransformPrivate::apply(T *dst, const T *src, qsizetype count, TransformFlags flags) const
617{
618 if (!colorMatrix.isValid())
619 return;
620
621 updateLutsIn();
622 updateLutsOut();
623
624 bool doApplyMatrix = (colorMatrix != QColorMatrix::identity());
625
626 QColorVector buffer[WorkBlockSize];
627 qsizetype i = 0;
628 while (i < count) {
629 const qsizetype len = qMin(count - i, WorkBlockSize);
630 if (flags & InputPremultiplied)
631 loadPremultiplied(buffer, src + i, len, this);
632 else
633 loadUnpremultiplied(buffer, src + i, len, this);
634
635 if (doApplyMatrix)
636 applyMatrix(buffer, len, colorMatrix);
637
638 if (flags & InputOpaque)
639 storeOpaque(dst + i, src + i, buffer, len, this);
640 else if (flags & OutputPremultiplied)
641 storePremultiplied(dst + i, src + i, buffer, len, this);
642 else
643 storeUnpremultiplied(dst + i, src + i, buffer, len, this);
644
645 i += len;
646 }
647}
648
649/*!
650 \internal
651 \enum QColorTransformPrivate::TransformFlag
652
653 Defines how the transform is to be applied.
654
655 \value Unpremultiplied The input and output should both be unpremultiplied.
656 \value InputOpaque The input is guaranteed to be opaque.
657 \value InputPremultiplied The input is premultiplied.
658 \value OutputPremultiplied The output should be premultiplied.
659 \value Premultiplied Both input and output should both be premultiplied.
660*/
661
662/*!
663 \internal
664 Prepares a color transformation for fast application. You do not need to
665 call this explicitly as it will be called implicitly on the first transforms, but
666 if you want predictable performance on the first transforms, you can perform it
667 in advance.
668
669 \sa QColorTransform::map(), apply()
670*/
671void QColorTransformPrivate::prepare()
672{
673 updateLutsIn();
674 updateLutsOut();
675}
676
677/*!
678 \internal
679 Applies the color transformation on \a count QRgb pixels starting from
680 \a src and stores the result in \a dst.
681
682 Thread-safe if prepare() has been called first.
683
684 Assumes unpremultiplied data by default. Set \a flags to change defaults.
685
686 \sa prepare()
687*/
688void QColorTransformPrivate::apply(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const
689{
690 apply<QRgb>(dst, src, count, flags);
691}
692
693/*!
694 \internal
695 Applies the color transformation on \a count QRgba64 pixels starting from
696 \a src and stores the result in \a dst.
697
698 Thread-safe if prepare() has been called first.
699
700 Assumes unpremultiplied data by default. Set \a flags to change defaults.
701
702 \sa prepare()
703*/
704void QColorTransformPrivate::apply(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const
705{
706 apply<QRgba64>(dst, src, count, flags);
707}
708
709
710QT_END_NAMESPACE
711