1// Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#ifndef QT3DCORE_VECTOR4D_SSE_P_H
5#define QT3DCORE_VECTOR4D_SSE_P_H
6
7//
8// W A R N I N G
9// -------------
10//
11// This file is not part of the Qt3D API. It exists purely as an
12// implementation detail. This header file may change from version to
13// version without notice, or even be removed.
14//
15// We mean it.
16//
17
18#include <Qt3DCore/private/vector3d_p.h>
19#include <QtGui/qvector4d.h>
20
21#ifdef __SSE2__
22
23QT_BEGIN_NAMESPACE
24
25namespace Qt3DCore {
26
27class Matrix4x4_SSE;
28
29class Vector4D_SSE
30{
31public:
32 Q_ALWAYS_INLINE Vector4D_SSE()
33 : m_xyzw(_mm_setzero_ps())
34 {
35 }
36
37 explicit Q_ALWAYS_INLINE Vector4D_SSE(Qt::Initialization) {}
38
39 explicit Q_ALWAYS_INLINE Vector4D_SSE(float x, float y, float z, float w)
40 : m_xyzw(_mm_set_ps(z: w, y: z, x: y, w: x))
41 {
42 }
43
44 explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector4D v)
45 : m_xyzw(_mm_set_ps(z: v.w(), y: v.z(), x: v.y(), w: v.x()))
46 {
47 }
48
49 explicit Q_ALWAYS_INLINE Vector4D_SSE(const Vector3D_SSE &vec3, float w = 0.0f)
50 : m_xyzw(vec3.m_xyzw)
51 {
52 setW(w);
53 }
54
55 explicit Q_ALWAYS_INLINE Vector4D_SSE(QVector3D v, float w = 0.0f)
56 : m_xyzw(_mm_set_ps(z: w, y: v.z(), x: v.y(), w: v.x()))
57 {
58 }
59
60 Q_ALWAYS_INLINE Vector4D_SSE &operator+=(Vector4D_SSE vector)
61 {
62 m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
63 return *this;
64 }
65
66 Q_ALWAYS_INLINE Vector4D_SSE &operator-=(Vector4D_SSE vector)
67 {
68 m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
69 return *this;
70 }
71
72 Q_ALWAYS_INLINE Vector4D_SSE &operator*=(Vector4D_SSE vector)
73 {
74 m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
75 return *this;
76 }
77
78 Q_ALWAYS_INLINE Vector4D_SSE &operator/=(Vector4D_SSE vector)
79 {
80 m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
81 return *this;
82 }
83
84 Q_ALWAYS_INLINE Vector4D_SSE &operator*=(float factor)
85 {
86 m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
87 return *this;
88 }
89
90 Q_ALWAYS_INLINE Vector4D_SSE &operator/=(float factor)
91 {
92 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
93 return *this;
94 }
95
96 Q_ALWAYS_INLINE bool operator==(Vector4D_SSE other) const
97 {
98 // 0b1111 == 0xf
99 return (_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) == 0xf);
100 }
101
102 Q_ALWAYS_INLINE bool operator!=(Vector4D_SSE other) const
103 {
104 return !(*this == other);
105 }
106
107 Q_ALWAYS_INLINE QVector4D toQVector4D() const
108 {
109 return QVector4D(x(), y(), z(), w());
110 }
111
112 // TODO: Uncomment when we introduce Vector3D_SSE
113 //Q_ALWAYS_INLINE Vector3D_SSE toVector3D() const { return Vector3D_SSE(*this); }
114
115 Q_ALWAYS_INLINE float lengthSquared() const
116 {
117 return dotProduct(a: *this, b: *this);
118 }
119
120 Q_ALWAYS_INLINE float length() const
121 {
122 return sqrt(x: dotProduct(a: *this, b: *this));
123 }
124
125 Q_ALWAYS_INLINE void normalize()
126 {
127 const float len = length();
128 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
129 }
130
131 Q_ALWAYS_INLINE Vector4D_SSE normalized() const
132 {
133 Vector4D_SSE v = *this;
134 v.normalize();
135 return v;
136 }
137
138 Q_ALWAYS_INLINE bool isNull() const
139 {
140 // 0b1111 == 0xf
141 return _mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_setzero_ps())) == 0xf;
142 }
143
144 Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
145
146 Q_ALWAYS_INLINE float y() const
147 {
148 // 0b01010101 = 0x55
149 return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0x55));
150 }
151
152 Q_ALWAYS_INLINE float z() const
153 {
154 // 0b10101010 = 0xaa
155 return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
156 }
157
158 Q_ALWAYS_INLINE float w() const
159 {
160 // 0b11111111 = 0xff
161 return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0xff));
162 }
163
164 Q_ALWAYS_INLINE void setX(float x)
165 {
166 m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
167 }
168
169 Q_ALWAYS_INLINE void setY(float y)
170 {
171 // m_xyzw = a, b, c, d
172
173 // y, y, y, y
174 const __m128 yVec = _mm_set_ps1(w: y);
175
176 // y, y, a, a
177 // 0b00000000 == 0x0
178 const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, 0x0);
179
180 // a, y, c, d
181 // 0b11100010 == 0xe2
182 m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, 0xe2);
183 }
184
185 Q_ALWAYS_INLINE void setZ(float z)
186 {
187 // m_xyzw = a, b, c, d
188
189 // z, z, z, z
190 const __m128 zVec = _mm_set_ps1(w: z);
191
192 // z, z, d, d
193 // 0b11110000 == 0xf0
194 const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, 0xf0);
195
196 // a, b, z, d
197 // 0b10000100 == 0x84
198 m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, 0x84);
199 }
200
201 Q_ALWAYS_INLINE void setW(float w)
202 {
203#ifdef __SSE4_1__
204 const __m128 wVec = _mm_set_ss(w);
205 // insert element 0 of wVec into position 3 in vec3, don't zero anything
206 m_xyzw = _mm_insert_ps(m_xyzw, wVec, 0x30);
207#else
208 // m_xyzw = a, b, c, d
209
210 // w, w, w, w
211 const __m128 wVec = _mm_set_ps1(w: w);
212
213 // c, c, w, w
214 const __m128 cwVec = _mm_shuffle_ps(m_xyzw, wVec, _MM_SHUFFLE(0, 0, 2, 2));
215
216 // a, b, c, w
217 m_xyzw = _mm_shuffle_ps(m_xyzw, cwVec, _MM_SHUFFLE(2, 0, 1, 0));
218#endif
219 }
220
221 Q_ALWAYS_INLINE float operator[](int idx) const
222 {
223 Q_DECL_ALIGN(16) float vec[4];
224 _mm_store_ps(p: vec, a: m_xyzw);
225 return vec[idx];
226 }
227
228 struct DigitWrapper
229 {
230 explicit DigitWrapper(int idx, Vector4D_SSE *vec)
231 : m_vec(vec)
232 , m_idx(idx)
233 {}
234
235 operator float() const
236 {
237 switch (m_idx) {
238 case 0:
239 return m_vec->x();
240 case 1:
241 return m_vec->y();
242 case 2:
243 return m_vec->z();
244 case 3:
245 return m_vec->w();
246 default:
247 Q_UNREACHABLE_RETURN(0.0f);
248 }
249 }
250 void operator =(float value)
251 {
252 switch (m_idx) {
253 case 0:
254 m_vec->setX(value);
255 break;
256 case 1:
257 m_vec->setY(value);
258 break;
259 case 2:
260 m_vec->setZ(value);
261 break;
262 case 3:
263 m_vec->setW(value);
264 break;
265 default:
266 Q_UNREACHABLE();
267 }
268 }
269
270 private:
271 Vector4D_SSE *m_vec;
272 const int m_idx;
273 };
274
275 Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
276 {
277 return DigitWrapper(idx, this);
278 }
279
280 static Q_ALWAYS_INLINE float dotProduct(Vector4D_SSE a, Vector4D_SSE b)
281 {
282#if defined(__SSE4_1__)
283 // 0b11111111 = 0xff
284 return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, 0xff));
285#elif defined(__SSE3__)
286 const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
287 // a + b, c + d, a + d, c + d
288 const __m128 partialSum = _mm_hadd_ps(mult, mult);
289 // c + d, ......
290 // 0x00000001 =
291 const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, 0x1);
292 return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
293#else
294 const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
295 // (multX, multY, 0, 0) + (multZ, multW, 0, 0) -> (multX + multZ, multY + multW, 0, 0)
296 // 0b00001110 == 0xe
297 const __m128 shuffled = _mm_shuffle_ps(mult, mult, 0xe);
298 __m128 result = _mm_add_ps(a: shuffled, b: mult);
299 // (multX + multZ, 0, 0, 0) + (multY + multW, 0, 0, 0);
300 // 0b00000001 == 0x1
301 const __m128 shuffled2 = _mm_shuffle_ps(result, result, 0x1);
302 result = _mm_add_ps(a: result, b: shuffled2);
303 return _mm_cvtss_f32(a: result);
304#endif
305 }
306
307 friend class Matrix4x4_SSE;
308 friend class Vector3D_SSE;
309 friend Vector4D_SSE operator*(const Vector4D_SSE &vector, const Matrix4x4_SSE &matrix);
310 friend Vector4D_SSE operator*(const Matrix4x4_SSE &matrix, const Vector4D_SSE &vector);
311
312 friend Q_ALWAYS_INLINE const Vector4D_SSE operator+(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 += v2; }
313 friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 -= v2; }
314 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(float factor, Vector4D_SSE vector) { return vector *= factor; }
315 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE vector, float factor) { return vector *= factor; }
316 friend Q_ALWAYS_INLINE const Vector4D_SSE operator*(Vector4D_SSE v1, Vector4D_SSE v2) { return v1 *= v2; }
317 friend Q_ALWAYS_INLINE const Vector4D_SSE operator-(Vector4D_SSE vector)
318 {
319 Vector4D_SSE c(Qt::Uninitialized);
320
321 c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -0.0f));
322
323 return c;
324 }
325
326 friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, float divisor) { return vector /= divisor; }
327 friend Q_ALWAYS_INLINE const Vector4D_SSE operator/(Vector4D_SSE vector, Vector4D_SSE divisor) { return vector /= divisor; }
328
329 friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector4D_SSE &v);
330 friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector4D_SSE& v1, const Vector4D_SSE& v2)
331 {
332 return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
333 ::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
334 ::qFuzzyCompare(p1: v1.z(), p2: v2.z()) &&
335 ::qFuzzyCompare(p1: v1.w(), p2: v2.w());
336 }
337
338private:
339 // Q_DECL_ALIGN(16) float m[4];// for SSE support
340 __m128 m_xyzw;
341};
342
343} // Qt3DCore
344
345Q_DECLARE_TYPEINFO(Qt3DCore::Vector4D_SSE, Q_PRIMITIVE_TYPE);
346
347QT_END_NAMESPACE
348
349Q_DECLARE_METATYPE(Qt3DCore::Vector4D_SSE)
350
351#endif // __SSE2__
352
353#endif // QT3DCORE_VECTOR4D_SSE_P_H
354

source code of qt3d/src/core/transforms/vector4d_sse_p.h