1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#ifndef QCOLORTRCLUT_P_H
5#define QCOLORTRCLUT_P_H
6
7//
8// W A R N I N G
9// -------------
10//
11// This file is not part of the Qt API. It exists purely as an
12// implementation detail. This header file may change from version to
13// version without notice, or even be removed.
14//
15// We mean it.
16//
17
18#include <QtGui/private/qtguiglobal_p.h>
19#include <QtGui/qrgb.h>
20#include <QtGui/qrgba64.h>
21
22#include <cmath>
23#include <memory>
24
25#if defined(__SSE2__)
26#include <emmintrin.h>
27#elif defined(__ARM_NEON__) || defined(__ARM_NEON)
28#include <arm_neon.h>
29#endif
30
31QT_BEGIN_NAMESPACE
32
33class QColorTransferFunction;
34class QColorTransferTable;
35
36class Q_GUI_EXPORT QColorTrcLut
37{
38public:
39 static std::shared_ptr<QColorTrcLut> fromGamma(qreal gamma);
40 static std::shared_ptr<QColorTrcLut> fromTransferFunction(const QColorTransferFunction &transfn);
41 static std::shared_ptr<QColorTrcLut> fromTransferTable(const QColorTransferTable &transTable);
42
43 // The following methods all convert opaque or unpremultiplied colors:
44
45 QRgba64 toLinear64(QRgb rgb32) const
46 {
47#if defined(__SSE2__)
48 __m128i v = _mm_cvtsi32_si128(a: rgb32);
49 v = _mm_unpacklo_epi8(a: v, b: _mm_setzero_si128());
50 const __m128i vidx = _mm_slli_epi16(a: v, count: 4);
51 const int ridx = _mm_extract_epi16(vidx, 2);
52 const int gidx = _mm_extract_epi16(vidx, 1);
53 const int bidx = _mm_extract_epi16(vidx, 0);
54 v = _mm_slli_epi16(a: v, count: 8); // a * 256
55 v = _mm_insert_epi16(v, m_toLinear[ridx], 0);
56 v = _mm_insert_epi16(v, m_toLinear[gidx], 1);
57 v = _mm_insert_epi16(v, m_toLinear[bidx], 2);
58 v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8));
59 QRgba64 rgba64;
60 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v);
61 return rgba64;
62#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
63 uint8x8_t v8 = vreinterpret_u8_u32(vmov_n_u32(rgb32));
64 uint16x4_t v16 = vget_low_u16(vmovl_u8(v8));
65 const uint16x4_t vidx = vshl_n_u16(v16, 4);
66 const int ridx = vget_lane_u16(vidx, 2);
67 const int gidx = vget_lane_u16(vidx, 1);
68 const int bidx = vget_lane_u16(vidx, 0);
69 v16 = vshl_n_u16(v16, 8); // a * 256
70 v16 = vset_lane_u16(m_toLinear[ridx], v16, 0);
71 v16 = vset_lane_u16(m_toLinear[gidx], v16, 1);
72 v16 = vset_lane_u16(m_toLinear[bidx], v16, 2);
73 v16 = vadd_u16(v16, vshr_n_u16(v16, 8));
74 return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v16), 0));
75#else
76 uint r = m_toLinear[qRed(rgb32) << 4];
77 uint g = m_toLinear[qGreen(rgb32) << 4];
78 uint b = m_toLinear[qBlue(rgb32) << 4];
79 r = r + (r >> 8);
80 g = g + (g >> 8);
81 b = b + (b >> 8);
82 return QRgba64::fromRgba64(r, g, b, qAlpha(rgb32) * 257);
83#endif
84 }
85 QRgba64 toLinear64(QRgba64) const = delete;
86
87 QRgb toLinear(QRgb rgb32) const
88 {
89 return convertWithTable(rgb32, table: m_toLinear);
90 }
91
92 QRgba64 toLinear(QRgba64 rgb64) const
93 {
94 return convertWithTable(rgb64, table: m_toLinear);
95 }
96
97 float u8ToLinearF32(int c) const
98 {
99 ushort v = m_toLinear[c << 4];
100 return v * (1.0f / (255*256));
101 }
102
103 float u16ToLinearF32(int c) const
104 {
105 c -= (c >> 8);
106 ushort v = m_toLinear[c >> 4];
107 return v * (1.0f / (255*256));
108 }
109
110 float toLinear(float f) const
111 {
112 ushort v = m_toLinear[(int)(f * (255 * 16) + 0.5f)];
113 return v * (1.0f / (255*256));
114 }
115
116 QRgb fromLinear64(QRgba64 rgb64) const
117 {
118#if defined(__SSE2__)
119 __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64));
120 v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8));
121 const __m128i vidx = _mm_srli_epi16(a: v, count: 4);
122 const int ridx = _mm_extract_epi16(vidx, 0);
123 const int gidx = _mm_extract_epi16(vidx, 1);
124 const int bidx = _mm_extract_epi16(vidx, 2);
125 v = _mm_insert_epi16(v, m_fromLinear[ridx], 2);
126 v = _mm_insert_epi16(v, m_fromLinear[gidx], 1);
127 v = _mm_insert_epi16(v, m_fromLinear[bidx], 0);
128 v = _mm_add_epi16(a: v, b: _mm_set1_epi16(w: 0x80));
129 v = _mm_srli_epi16(a: v, count: 8);
130 v = _mm_packus_epi16(a: v, b: v);
131 return _mm_cvtsi128_si32(a: v);
132#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
133 uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
134 v = vsub_u16(v, vshr_n_u16(v, 8));
135 const uint16x4_t vidx = vshr_n_u16(v, 4);
136 const int ridx = vget_lane_u16(vidx, 0);
137 const int gidx = vget_lane_u16(vidx, 1);
138 const int bidx = vget_lane_u16(vidx, 2);
139 v = vset_lane_u16(m_fromLinear[ridx], v, 2);
140 v = vset_lane_u16(m_fromLinear[gidx], v, 1);
141 v = vset_lane_u16(m_fromLinear[bidx], v, 0);
142 uint8x8_t v8 = vrshrn_n_u16(vcombine_u16(v, v), 8);
143 return vget_lane_u32(vreinterpret_u32_u8(v8), 0);
144#else
145 uint a = rgb64.alpha();
146 uint r = rgb64.red();
147 uint g = rgb64.green();
148 uint b = rgb64.blue();
149 a = a - (a >> 8);
150 r = r - (r >> 8);
151 g = g - (g >> 8);
152 b = b - (b >> 8);
153 a = (a + 0x80) >> 8;
154 r = (m_fromLinear[r >> 4] + 0x80) >> 8;
155 g = (m_fromLinear[g >> 4] + 0x80) >> 8;
156 b = (m_fromLinear[b >> 4] + 0x80) >> 8;
157 return (a << 24) | (r << 16) | (g << 8) | b;
158#endif
159 }
160
161 QRgb fromLinear(QRgb rgb32) const
162 {
163 return convertWithTable(rgb32, table: m_fromLinear);
164 }
165
166 QRgba64 fromLinear(QRgba64 rgb64) const
167 {
168 return convertWithTable(rgb64, table: m_fromLinear);
169 }
170
171 int u8FromLinearF32(float f) const
172 {
173 ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
174 return (v + 0x80) >> 8;
175 }
176 int u16FromLinearF32(float f) const
177 {
178 ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
179 return v + (v >> 8);
180 }
181 float fromLinear(float f) const
182 {
183 ushort v = m_fromLinear[(int)(f * (255 * 16) + 0.5f)];
184 return v * (1.0f / (255*256));
185 }
186
187 // We translate to 0-65280 (255*256) instead to 0-65535 to make simple
188 // shifting an accurate conversion.
189 // We translate from 0-4080 (255*16) for the same speed up, and to keep
190 // the tables small enough to fit in most inner caches.
191 ushort m_toLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
192 ushort m_fromLinear[(255 * 16) + 1]; // [0-4080] -> [0-65280]
193
194private:
195 QColorTrcLut() { } // force uninitialized members
196
197 static std::shared_ptr<QColorTrcLut> create();
198
199 Q_ALWAYS_INLINE static QRgb convertWithTable(QRgb rgb32, const ushort *table)
200 {
201 const int r = (table[qRed(rgb: rgb32) << 4] + 0x80) >> 8;
202 const int g = (table[qGreen(rgb: rgb32) << 4] + 0x80) >> 8;
203 const int b = (table[qBlue(rgb: rgb32) << 4] + 0x80) >> 8;
204 return (rgb32 & 0xff000000) | (r << 16) | (g << 8) | b;
205 }
206 Q_ALWAYS_INLINE static QRgba64 convertWithTable(QRgba64 rgb64, const ushort *table)
207 {
208#if defined(__SSE2__)
209 __m128i v = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(&rgb64));
210 v = _mm_sub_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8));
211 const __m128i vidx = _mm_srli_epi16(a: v, count: 4);
212 const int ridx = _mm_extract_epi16(vidx, 2);
213 const int gidx = _mm_extract_epi16(vidx, 1);
214 const int bidx = _mm_extract_epi16(vidx, 0);
215 v = _mm_insert_epi16(v, table[ridx], 2);
216 v = _mm_insert_epi16(v, table[gidx], 1);
217 v = _mm_insert_epi16(v, table[bidx], 0);
218 v = _mm_add_epi16(a: v, b: _mm_srli_epi16(a: v, count: 8));
219 QRgba64 rgba64;
220 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(&rgba64), a: v);
221 return rgba64;
222#elif (defined(__ARM_NEON__) || defined(__ARM_NEON)) && Q_BYTE_ORDER == Q_LITTLE_ENDIAN
223 uint16x4_t v = vreinterpret_u16_u64(vmov_n_u64(rgb64));
224 v = vsub_u16(v, vshr_n_u16(v, 8));
225 const uint16x4_t vidx = vshr_n_u16(v, 4);
226 const int ridx = vget_lane_u16(vidx, 2);
227 const int gidx = vget_lane_u16(vidx, 1);
228 const int bidx = vget_lane_u16(vidx, 0);
229 v = vset_lane_u16(table[ridx], v, 2);
230 v = vset_lane_u16(table[gidx], v, 1);
231 v = vset_lane_u16(table[bidx], v, 0);
232 v = vadd_u16(v, vshr_n_u16(v, 8));
233 return QRgba64::fromRgba64(vget_lane_u64(vreinterpret_u64_u16(v), 0));
234#else
235 ushort r = rgb64.red();
236 ushort g = rgb64.green();
237 ushort b = rgb64.blue();
238 r = r - (r >> 8);
239 g = g - (g >> 8);
240 b = b - (b >> 8);
241 r = table[r >> 4];
242 g = table[g >> 4];
243 b = table[b >> 4];
244 r = r + (r >> 8);
245 g = g + (g >> 8);
246 b = b + (b >> 8);
247 return QRgba64::fromRgba64(r, g, b, rgb64.alpha());
248#endif
249 }
250};
251
252QT_END_NAMESPACE
253
254#endif // QCOLORTRCLUT_P_H
255

source code of qtbase/src/gui/painting/qcolortrclut_p.h