1/****************************************************************************
2**
3** Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the Qt3D module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#ifndef QT3DCORE_VECTOR4D_SSE_P_H
41#define QT3DCORE_VECTOR4D_SSE_P_H
42
43//
44// W A R N I N G
45// -------------
46//
47// This file is not part of the Qt3D API. It exists purely as an
48// implementation detail. This header file may change from version to
49// version without notice, or even be removed.
50//
51// We mean it.
52//
53
54#include <Qt3DCore/private/vector3d_p.h>
55#include <QtGui/qvector4d.h>
56
57#ifdef QT_COMPILER_SUPPORTS_SSE2
58
59QT_BEGIN_NAMESPACE
60
61namespace Qt3DCore {
62
63class Matrix4x4_SSE;
64class Matrix4x4_AVX2;
65
66class Vector4D_SSE
67{
68public:
69 Q_ALWAYS_INLINE Vector4D_SSE()
70 : m_xyzw(_mm_setzero_ps())
71 {
72 }
73
74 explicit Q_ALWAYS_INLINE Vector4D_SSE(Qt::Initialization) {}
75
76 explicit Q_ALWAYS_INLINE Vector4D_SSE(float x, float y, float z, float w)
77 : m_xyzw(_mm_set_ps(z: w, y: z, x: y, w: x))
78 {
79 }
80
81 explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector4D v)
82 : m_xyzw(_mm_set_ps(z: v.w(), y: v.z(), x: v.y(), w: v.x()))
83 {
84 }
85
86 explicit Q_ALWAYS_INLINE Vector4D_SSE(const Vector3D_SSE &vec3, float w = 0.0f)
87 : m_xyzw(vec3.m_xyzw)
88 {
89 setW(w);
90 }
91
92 explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector3D v, float w = 0.0f)
93 : m_xyzw(_mm_set_ps(z: w, y: v.z(), x: v.y(), w: v.x()))
94 {
95 }
96
97 Q_ALWAYS_INLINE Vector4D_SSE &operator+=(Vector4D_SSE vector)
98 {
99 m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
100 return *this;
101 }
102
103 Q_ALWAYS_INLINE Vector4D_SSE &operator-=(Vector4D_SSE vector)
104 {
105 m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
106 return *this;
107 }
108
109 Q_ALWAYS_INLINE Vector4D_SSE &operator*=(Vector4D_SSE vector)
110 {
111 m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
112 return *this;
113 }
114
115 Q_ALWAYS_INLINE Vector4D_SSE &operator/=(Vector4D_SSE vector)
116 {
117 m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
118 return *this;
119 }
120
121 Q_ALWAYS_INLINE Vector4D_SSE &operator*=(float factor)
122 {
123 m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
124 return *this;
125 }
126
127 Q_ALWAYS_INLINE Vector4D_SSE &operator/=(float factor)
128 {
129 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
130 return *this;
131 }
132
133 Q_ALWAYS_INLINE bool operator==(Vector4D_SSE other) const
134 {
135 // 0b1111 == 0xf
136 return (_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) == 0xf);
137 }
138
139 Q_ALWAYS_INLINE bool operator!=(Vector4D_SSE other) const
140 {
141 return !(*this == other);
142 }
143
144 Q_ALWAYS_INLINE QVector4D toQVector4D() const
145 {
146 return QVector4D(x(), y(), z(), w());
147 }
148
149 // TODO: Uncomment when we introduce Vector3D_SSE
150 //Q_ALWAYS_INLINE Vector3D_SSE toVector3D() const { return Vector3D_SSE(*this); }
151
152 Q_ALWAYS_INLINE float lengthSquared() const
153 {
154 return dotProduct(a: *this, b: *this);
155 }
156
157 Q_ALWAYS_INLINE float length() const
158 {
159 return sqrt(x: dotProduct(a: *this, b: *this));
160 }
161
162 Q_ALWAYS_INLINE void normalize()
163 {
164 const float len = length();
165 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
166 }
167
168 Q_ALWAYS_INLINE Vector4D_SSE normalized() const
169 {
170 Vector4D_SSE v = *this;
171 v.normalize();
172 return v;
173 }
174
175 Q_ALWAYS_INLINE bool isNull() const
176 {
177 // 0b1111 == 0xf
178 return _mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_setzero_ps())) == 0xf;
179 }
180
181 Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
182
183 Q_ALWAYS_INLINE float y() const
184 {
185 // 0b01010101 = 0x55
186 return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0x55));
187 }
188
189 Q_ALWAYS_INLINE float z() const
190 {
191 // 0b10101010 = 0xaa
192 return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
193 }
194
195 Q_ALWAYS_INLINE float w() const
196 {
197 // 0b11111111 = 0xff
198 return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0xff));
199 }
200
201 Q_ALWAYS_INLINE void setX(float x)
202 {
203 m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
204 }
205
206 Q_ALWAYS_INLINE void setY(float y)
207 {
208 // m_xyzw = a, b, c, d
209
210 // y, y, y, y
211 const __m128 yVec = _mm_set_ps1(w: y);
212
213 // y, y, a, a
214 // 0b00000000 == 0x0
215 const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, 0x0);
216
217 // a, y, c, d
218 // 0b11100010 == 0xe2
219 m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, 0xe2);
220 }
221
222 Q_ALWAYS_INLINE void setZ(float z)
223 {
224 // m_xyzw = a, b, c, d
225
226 // z, z, z, z
227 const __m128 zVec = _mm_set_ps1(w: z);
228
229 // z, z, d, d
230 // 0b11110000 == 0xf0
231 const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, 0xf0);
232
233 // a, b, z, d
234 // 0b10000100 == 0x84
235 m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, 0x84);
236 }
237
238 Q_ALWAYS_INLINE void setW(float w)
239 {
240#ifdef __SSE4_1__
241 const __m128 wVec = _mm_set_ss(w);
242 // insert element 0 of wVec into position 3 in vec3, don't zero anything
243 m_xyzw = _mm_insert_ps(m_xyzw, wVec, 0x30);
244#else
245 // m_xyzw = a, b, c, d
246
247 // w, w, w, w
248 const __m128 wVec = _mm_set_ps1(w: w);
249
250 // c, c, w, w
251 const __m128 cwVec = _mm_shuffle_ps(m_xyzw, wVec, _MM_SHUFFLE(0, 0, 2, 2));
252
253 // a, b, c, w
254 m_xyzw = _mm_shuffle_ps(m_xyzw, cwVec, _MM_SHUFFLE(2, 0, 1, 0));
255#endif
256 }
257
258 Q_ALWAYS_INLINE float operator[](int idx) const
259 {
260 Q_DECL_ALIGN(16) float vec[4];
261 _mm_store_ps(p: vec, a: m_xyzw);
262 return vec[idx];
263 }
264
265 struct DigitWrapper
266 {
267 explicit DigitWrapper(int idx, Vector4D_SSE *vec)
268 : m_vec(vec)
269 , m_idx(idx)
270 {}
271
272 operator float() const
273 {
274 switch (m_idx) {
275 case 0:
276 return m_vec->x();
277 case 1:
278 return m_vec->y();
279 case 2:
280 return m_vec->z();
281 case 3:
282 return m_vec->w();
283 default:
284 Q_UNREACHABLE();
285 return 0.0f;
286 }
287 }
288 void operator =(float value)
289 {
290 switch (m_idx) {
291 case 0:
292 m_vec->setX(value);
293 break;
294 case 1:
295 m_vec->setY(value);
296 break;
297 case 2:
298 m_vec->setZ(value);
299 break;
300 case 3:
301 m_vec->setW(value);
302 break;
303 default:
304 Q_UNREACHABLE();
305 }
306 }
307
308 private:
309 Vector4D_SSE *m_vec;
310 const int m_idx;
311 };
312
313 Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
314 {
315 return DigitWrapper(idx, this);
316 }
317
318 static Q_ALWAYS_INLINE float dotProduct(Vector4D_SSE a, Vector4D_SSE b)
319 {
320#if defined(__SSE4_1__)
321 // 0b11111111 = 0xff
322 return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, 0xff));
323#elif defined(__SSE3__)
324 const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
325 // a + b, c + d, a + d, c + d
326 const __m128 partialSum = _mm_hadd_ps(mult, mult);
327 // c + d, ......
328 // 0x00000001 =
329 const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, 0x1);
330 return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
331#else
332 const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
333 // (multX, multY, 0, 0) + (multZ, multW, 0, 0) -> (multX + multZ, multY + multW, 0, 0)
334 // 0b00001110 == 0xe
335 const __m128 shuffled = _mm_shuffle_ps(mult, mult, 0xe);
336 __m128 result = _mm_add_ps(a: shuffled, b: mult);
337 // (multX + multZ, 0, 0, 0) + (multY + multW, 0, 0, 0);
338 // 0b00000001 == 0x1
339 const __m128 shuffled2 = _mm_shuffle_ps(result, result, 0x1);
340 result = _mm_add_ps(a: result, b: shuffled2);
341 return _mm_cvtss_f32(a: result);
342#endif
343 }
344
345 friend class Matrix4x4_SSE;
346
347#ifdef __AVX2__
348 friend class Matrix4x4_AVX2;
349 friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_AVX2 &matrix);
350 friend Vector4D_SSE operator*(const Matrix4x4_AVX2 &matrix, const Vector4D_SSE &vector);
351#endif
352
353 friend class Vector3D_SSE;
354 friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
355 friend Vector4D_SSE operator*(const Matrix4x4_SSE &matrix, const Vector4D_SSE &vector);
356
357 friend Q_ALWAYS_INLINE const Vector4D_SSE operator+(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 += v2; }
358 friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 -= v2; }
359 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(float factor, Vector4D_SSE vector) { return vector *= factor; }
360 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE vector, float factor) { return vector *= factor; }
361 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 *= v2; }
362 friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE vector)
363 {
364 Vector4D_SSE c(Qt::Uninitialized);
365
366 c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -0.0f));
367
368 return c;
369 }
370
371 friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, float divisor) { return vector /= divisor; }
372 friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, Vector4D_SSE divisor) { return vector /= divisor; }
373
374 friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector4D_SSE &v);
375 friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector4D_SSE& v1, const Vector4D_SSE& v2)
376 {
377 return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
378 ::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
379 ::qFuzzyCompare(p1: v1.z(), p2: v2.z()) &&
380 ::qFuzzyCompare(p1: v1.w(), p2: v2.w());
381 }
382
383private:
384 // Q_DECL_ALIGN(16) float m[4];// for SSE support
385 __m128 m_xyzw;
386};
387
388} // Qt3DCore
389
390Q_DECLARE_TYPEINFO(Qt3DCore::Vector4D_SSE, Q_PRIMITIVE_TYPE);
391
392QT_END_NAMESPACE
393
394Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
395
396#endif // QT_COMPILER_SUPPORTS_SSE2
397
398#endif // QT3DCORE_VECTOR4D_SSE_P_H
399

source code of qt3d/src/core/transforms/vector4d_sse_p.h