1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtGui module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#ifndef QRGBA64_P_H
41#define QRGBA64_P_H
42
43//
44// W A R N I N G
45// -------------
46//
47// This file is not part of the Qt API. It exists purely as an
48// implementation detail. This header file may change from version to
49// version without notice, or even be removed.
50//
51// We mean it.
52//
53
54#include "qrgba64.h"
55#include "qdrawhelper_p.h"
56
57#include <QtCore/private/qsimd_p.h>
58#include <QtGui/private/qtguiglobal_p.h>
59
60QT_BEGIN_NAMESPACE
61
62inline QRgba64 combineAlpha256(QRgba64 rgba64, uint alpha256)
63{
64 return QRgba64::fromRgba64(red: rgba64.red(), green: rgba64.green(), blue: rgba64.blue(), alpha: (rgba64.alpha() * alpha256) >> 8);
65}
66
67inline QRgba64 multiplyAlpha65535(QRgba64 rgba64, uint alpha65535)
68{
69 return QRgba64::fromRgba64(red: qt_div_65535(x: rgba64.red() * alpha65535),
70 green: qt_div_65535(x: rgba64.green() * alpha65535),
71 blue: qt_div_65535(x: rgba64.blue() * alpha65535),
72 alpha: qt_div_65535(x: rgba64.alpha() * alpha65535));
73}
74
75#ifdef __SSE2__
76Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, __m128i va)
77{
78 __m128i vs = rgba64;
79 vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va));
80 vs = _mm_add_epi32(a: vs, b: _mm_srli_epi32(a: vs, count: 16));
81 vs = _mm_add_epi32(a: vs, b: _mm_set1_epi32(i: 0x8000));
82 vs = _mm_srai_epi32(a: vs, count: 16);
83 vs = _mm_packs_epi32(a: vs, b: _mm_setzero_si128());
84 return vs;
85}
86Q_ALWAYS_INLINE __m128i multiplyAlpha65535(__m128i rgba64, uint alpha65535)
87{
88 const __m128i va = _mm_shufflelo_epi16(_mm_cvtsi32_si128(alpha65535), _MM_SHUFFLE(0, 0, 0, 0));
89 return multiplyAlpha65535(rgba64, va);
90}
91#endif
92
93#if defined(__ARM_NEON__)
94Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint16x4_t alpha65535)
95{
96 uint32x4_t vs32 = vmull_u16(rgba64, alpha65535); // vs = vs * alpha
97 vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
98 return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
99}
100Q_ALWAYS_INLINE uint16x4_t multiplyAlpha65535(uint16x4_t rgba64, uint alpha65535)
101{
102 uint32x4_t vs32 = vmull_n_u16(rgba64, alpha65535); // vs = vs * alpha
103 vs32 = vsraq_n_u32(vs32, vs32, 16); // vs = vs + (vs >> 16)
104 return vrshrn_n_u32(vs32, 16); // vs = (vs + 0x8000) >> 16
105}
106#endif
107
108template<typename T>
109inline T multiplyAlpha255(T rgba64, uint alpha255)
110{
111#if defined(__SSE2__) || defined(__ARM_NEON__)
112 return multiplyAlpha65535(rgba64, alpha255 * 257);
113#else
114 return QRgba64::fromRgba64(qt_div_255(rgba64.red() * alpha255),
115 qt_div_255(rgba64.green() * alpha255),
116 qt_div_255(rgba64.blue() * alpha255),
117 qt_div_255(rgba64.alpha() * alpha255));
118#endif
119}
120
121inline QRgba64 interpolate255(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
122{
123 return QRgba64::fromRgba64(c: multiplyAlpha255(rgba64: x, alpha255: alpha1) + multiplyAlpha255(rgba64: y, alpha255: alpha2));
124}
125
126#if defined __SSE2__
127Q_ALWAYS_INLINE __m128i interpolate255(__m128i x, uint alpha1, __m128i y, uint alpha2)
128{
129 return _mm_add_epi32(a: multiplyAlpha255(rgba64: x, alpha255: alpha1), b: multiplyAlpha255(rgba64: y, alpha255: alpha2));
130}
131#endif
132
133#if defined __ARM_NEON__
134Q_ALWAYS_INLINE uint16x4_t interpolate255(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
135{
136 return vadd_u16(multiplyAlpha255(x, alpha1), multiplyAlpha255(y, alpha2));
137}
138#endif
139
140inline QRgba64 interpolate65535(QRgba64 x, uint alpha1, QRgba64 y, uint alpha2)
141{
142 return QRgba64::fromRgba64(c: multiplyAlpha65535(rgba64: x, alpha65535: alpha1) + multiplyAlpha65535(rgba64: y, alpha65535: alpha2));
143}
144
145#if defined __SSE2__
146Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, uint alpha1, __m128i y, uint alpha2)
147{
148 return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, alpha65535: alpha1), b: multiplyAlpha65535(rgba64: y, alpha65535: alpha2));
149}
150// alpha2 below is const-ref because otherwise MSVC2015 complains that it can't 16-byte align the argument.
151Q_ALWAYS_INLINE __m128i interpolate65535(__m128i x, __m128i alpha1, __m128i y, const __m128i &alpha2)
152{
153 return _mm_add_epi32(a: multiplyAlpha65535(rgba64: x, va: alpha1), b: multiplyAlpha65535(rgba64: y, va: alpha2));
154}
155#endif
156
157#if defined __ARM_NEON__
158Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint alpha1, uint16x4_t y, uint alpha2)
159{
160 return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
161}
162Q_ALWAYS_INLINE uint16x4_t interpolate65535(uint16x4_t x, uint16x4_t alpha1, uint16x4_t y, uint16x4_t alpha2)
163{
164 return vadd_u16(multiplyAlpha65535(x, alpha1), multiplyAlpha65535(y, alpha2));
165}
166#endif
167
168inline QRgba64 addWithSaturation(QRgba64 a, QRgba64 b)
169{
170 return QRgba64::fromRgba64(red: qMin(a: a.red() + b.red(), b: 65535),
171 green: qMin(a: a.green() + b.green(), b: 65535),
172 blue: qMin(a: a.blue() + b.blue(), b: 65535),
173 alpha: qMin(a: a.alpha() + b.alpha(), b: 65535));
174}
175
176#if QT_COMPILER_SUPPORTS_HERE(SSE2)
177QT_FUNCTION_TARGET(SSE2)
178Q_ALWAYS_INLINE uint toArgb32(__m128i v)
179{
180 v = _mm_unpacklo_epi16(a: v, b: _mm_setzero_si128());
181 v = _mm_add_epi32(a: v, b: _mm_set1_epi32(i: 128));
182 v = _mm_sub_epi32(a: v, b: _mm_srli_epi32(a: v, count: 8));
183 v = _mm_srli_epi32(a: v, count: 8);
184 v = _mm_packs_epi32(a: v, b: v);
185 v = _mm_packus_epi16(a: v, b: v);
186 return _mm_cvtsi128_si32(a: v);
187}
188#elif defined __ARM_NEON__
189Q_ALWAYS_INLINE uint toArgb32(uint16x4_t v)
190{
191 v = vsub_u16(v, vrshr_n_u16(v, 8));
192 v = vrshr_n_u16(v, 8);
193 uint8x8_t v8 = vmovn_u16(vcombine_u16(v, v));
194 return vget_lane_u32(vreinterpret_u32_u8(v8), 0);
195}
196#endif
197
198Q_ALWAYS_INLINE uint toArgb32(QRgba64 rgba64)
199{
200#if defined __SSE2__
201 __m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64);
202 v = _mm_shufflelo_epi16(v, _MM_SHUFFLE(3, 0, 1, 2));
203 return toArgb32(v);
204#elif defined __ARM_NEON__
205 uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
206#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
207 const uint8x8_t shuffleMask = { 4, 5, 2, 3, 0, 1, 6, 7 };
208 v = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(v), shuffleMask));
209#else
210 v = vext_u16(v, v, 3);
211#endif
212 return toArgb32(v);
213#else
214 return rgba64.toArgb32();
215#endif
216}
217
218Q_ALWAYS_INLINE uint toRgba8888(QRgba64 rgba64)
219{
220#if defined __SSE2__
221 __m128i v = _mm_loadl_epi64(p: (const __m128i *)&rgba64);
222 return toArgb32(v);
223#elif defined __ARM_NEON__
224 uint16x4_t v = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&rgba64)));
225 return toArgb32(v);
226#else
227 return ARGB2RGBA(toArgb32(rgba64));
228#endif
229}
230
231inline QRgba64 rgbBlend(QRgba64 d, QRgba64 s, uint rgbAlpha)
232{
233 QRgba64 blend;
234#if defined(__SSE2__)
235 __m128i vd = _mm_loadl_epi64(p: (const __m128i *)&d);
236 __m128i vs = _mm_loadl_epi64(p: (const __m128i *)&s);
237 __m128i va = _mm_cvtsi32_si128(a: rgbAlpha);
238 va = _mm_unpacklo_epi8(a: va, b: va);
239 va = _mm_shufflelo_epi16(va, _MM_SHUFFLE(3, 0, 1, 2));
240 __m128i vb = _mm_xor_si128(a: _mm_set1_epi16(w: -1), b: va);
241
242 vs = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vs, b: va), b: _mm_mulhi_epu16(a: vs, b: va));
243 vd = _mm_unpacklo_epi16(a: _mm_mullo_epi16(a: vd, b: vb), b: _mm_mulhi_epu16(a: vd, b: vb));
244 vd = _mm_add_epi32(a: vd, b: vs);
245 vd = _mm_add_epi32(a: vd, b: _mm_srli_epi32(a: vd, count: 16));
246 vd = _mm_add_epi32(a: vd, b: _mm_set1_epi32(i: 0x8000));
247 vd = _mm_srai_epi32(a: vd, count: 16);
248 vd = _mm_packs_epi32(a: vd, b: _mm_setzero_si128());
249
250 _mm_storel_epi64(p: (__m128i *)&blend, a: vd);
251#elif defined(__ARM_NEON__)
252 uint16x4_t vd = vreinterpret_u16_u64(vmov_n_u64(d));
253 uint16x4_t vs = vreinterpret_u16_u64(vmov_n_u64(s));
254 uint8x8_t va8 = vreinterpret_u8_u32(vmov_n_u32(ARGB2RGBA(rgbAlpha)));
255 uint16x4_t va = vreinterpret_u16_u8(vzip_u8(va8, va8).val[0]);
256 uint16x4_t vb = vdup_n_u16(0xffff);
257 vb = vsub_u16(vb, va);
258
259 uint32x4_t vs32 = vmull_u16(vs, va);
260 uint32x4_t vd32 = vmull_u16(vd, vb);
261 vd32 = vaddq_u32(vd32, vs32);
262 vd32 = vsraq_n_u32(vd32, vd32, 16);
263 vd = vrshrn_n_u32(vd32, 16);
264 vst1_u64(reinterpret_cast<uint64_t *>(&blend), vreinterpret_u64_u16(vd));
265#else
266 const int mr = qRed(rgbAlpha);
267 const int mg = qGreen(rgbAlpha);
268 const int mb = qBlue(rgbAlpha);
269 blend = qRgba64(qt_div_255(s.red() * mr + d.red() * (255 - mr)),
270 qt_div_255(s.green() * mg + d.green() * (255 - mg)),
271 qt_div_255(s.blue() * mb + d.blue() * (255 - mb)),
272 s.alpha());
273#endif
274 return blend;
275}
276
277static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src)
278{
279 if (src.isOpaque())
280 dst = src;
281 else if (!src.isTransparent())
282 dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: 65535 - src.alpha());
283}
284
285static Q_ALWAYS_INLINE void blend_pixel(QRgba64 &dst, QRgba64 src, const int const_alpha)
286{
287 if (const_alpha == 255)
288 return blend_pixel(dst, src);
289 if (!src.isTransparent()) {
290 src = multiplyAlpha255(rgba64: src, alpha255: const_alpha);
291 dst = src + multiplyAlpha65535(rgba64: dst, alpha65535: 65535 - src.alpha());
292 }
293}
294
295QT_END_NAMESPACE
296
297#endif // QRGBA64_P_H
298

source code of qtbase/src/gui/painting/qrgba64_p.h