1// Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#ifndef QT3DCORE_VECTOR3D_SSE_P_H
5#define QT3DCORE_VECTOR3D_SSE_P_H
6
7//
8// W A R N I N G
9// -------------
10//
11// This file is not part of the Qt3D API. It exists purely as an
12// implementation detail. This header file may change from version to
13// version without notice, or even be removed.
14//
15// We mean it.
16//
17
18#include <Qt3DCore/private/qt3dcore_global_p.h>
19#include <QtCore/private/qsimd_p.h>
20#include <QtCore/QtGlobal>
21#include <QtGui/qvector3d.h>
22#include <QDebug>
23#include <math.h>
24
25#ifdef __SSE2__
26
27QT_BEGIN_NAMESPACE
28
29namespace Qt3DCore {
30
31class Matrix4x4_SSE;
32class Vector4D_SSE;
33
34class Vector3D_SSE
35{
36public:
37
38 Q_ALWAYS_INLINE Vector3D_SSE()
39 : m_xyzw(_mm_setzero_ps())
40 {
41 }
42
43 explicit Q_ALWAYS_INLINE Vector3D_SSE(Qt::Initialization) {}
44
45 explicit Q_ALWAYS_INLINE Vector3D_SSE(float x, float y, float z)
46 : m_xyzw(_mm_set_ps(z: 0.0f, y: z, x: y, w: x))
47 {
48 }
49
50 explicit Q_ALWAYS_INLINE Vector3D_SSE(QVector3D v)
51 : m_xyzw(_mm_set_ps(z: 0.0f, y: v.z(), x: v.y(), w: v.x()))
52 {
53 }
54
55 explicit Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE(const Vector4D_SSE &v);
56
57 Q_ALWAYS_INLINE Vector3D_SSE &operator+=(Vector3D_SSE vector)
58 {
59 m_xyzw = _mm_add_ps(a: m_xyzw, b: vector.m_xyzw);
60 return *this;
61 }
62
63 Q_ALWAYS_INLINE Vector3D_SSE &operator-=(Vector3D_SSE vector)
64 {
65 m_xyzw = _mm_sub_ps(a: m_xyzw, b: vector.m_xyzw);
66 return *this;
67 }
68
69 Q_ALWAYS_INLINE Vector3D_SSE &operator*=(Vector3D_SSE vector)
70 {
71 m_xyzw = _mm_mul_ps(a: m_xyzw, b: vector.m_xyzw);
72 return *this;
73 }
74
75 Q_ALWAYS_INLINE Vector3D_SSE &operator/=(Vector3D_SSE vector)
76 {
77 m_xyzw = _mm_div_ps(a: m_xyzw, b: vector.m_xyzw);
78 return *this;
79 }
80
81 Q_ALWAYS_INLINE Vector3D_SSE &operator*=(float factor)
82 {
83 m_xyzw = _mm_mul_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
84 return *this;
85 }
86
87 Q_ALWAYS_INLINE Vector3D_SSE &operator/=(float factor)
88 {
89 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set1_ps(w: factor));
90 return *this;
91 }
92
93 Q_ALWAYS_INLINE bool operator==(Vector3D_SSE other) const
94 {
95 // 0b111 == 0x7
96 return ((_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: other.m_xyzw)) & 0x7) == 0x7);
97 }
98
99 Q_ALWAYS_INLINE bool operator!=(Vector3D_SSE other) const
100 {
101 return !(*this == other);
102 }
103
104 Q_ALWAYS_INLINE QVector3D toQVector3D() const
105 {
106 return QVector3D(x(), y(), z());
107 }
108
109 Q_ALWAYS_INLINE float lengthSquared() const
110 {
111 return Qt3DCore::Vector3D_SSE::dotProduct(a: *this, b: *this);
112 }
113
114 Q_ALWAYS_INLINE float length() const
115 {
116 return sqrt(x: Qt3DCore::Vector3D_SSE::dotProduct(a: *this, b: *this));
117 }
118
119 Q_ALWAYS_INLINE float distanceToPoint(const Vector3D_SSE &point) const
120 {
121 return (*this - point).length();
122 }
123
124 Q_ALWAYS_INLINE void normalize()
125 {
126 const float len = length();
127 m_xyzw = _mm_div_ps(a: m_xyzw, b: _mm_set_ps1(w: len));
128 }
129
130 Q_ALWAYS_INLINE Vector3D_SSE normalized() const
131 {
132 Vector3D_SSE v = *this;
133 v.normalize();
134 return v;
135 }
136
137 Q_ALWAYS_INLINE bool isNull() const
138 {
139 // Ignore last bit
140 // 0b111 = 0x7
141 return ((_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_xyzw, b: _mm_set_ps1(w: 0.0f))) & 0x7) == 0x7);
142 }
143
144 Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE unproject(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
145 Q_3DCORE_PRIVATE_EXPORT Vector3D_SSE project(const Matrix4x4_SSE &modelView, const Matrix4x4_SSE &projection, const QRect &viewport) const;
146
147 Q_ALWAYS_INLINE float x() const { return _mm_cvtss_f32(a: m_xyzw); }
148
149 Q_ALWAYS_INLINE float y() const
150 {
151 // 0b01010101 = 0x55
152 return _mm_cvtss_f32(_mm_shuffle_ps(m_xyzw, m_xyzw, 0x55));
153 }
154
155 Q_ALWAYS_INLINE float z() const
156 {
157 // 0b10101010 = 0xaa
158 return _mm_cvtss_f32(a: _mm_unpackhi_ps(a: m_xyzw, b: m_xyzw));
159 }
160
161 Q_ALWAYS_INLINE void setX(float x)
162 {
163 m_xyzw = _mm_move_ss(a: m_xyzw, b: _mm_set_ss(w: x));
164 }
165
166 Q_ALWAYS_INLINE void setY(float y)
167 {
168 // m_xyzw = a, b, c, d
169
170 // y, y, y, y
171 const __m128 yVec = _mm_set_ps1(w: y);
172
173 // y, y, a, a
174 // 0b00000000 == 0x0
175 const __m128 yaVec = _mm_shuffle_ps(yVec, m_xyzw, 0x0);
176
177 // a, y, c, d
178 // 0b11100010 == 0xe2
179 m_xyzw = _mm_shuffle_ps(yaVec, m_xyzw, 0xe2);
180 }
181
182 Q_ALWAYS_INLINE void setZ(float z)
183 {
184 // m_xyzw = a, b, c, d
185
186 // z, z, z, z
187 const __m128 zVec = _mm_set_ps1(w: z);
188
189 // z, z, d, d
190 // 0b11110000 == 0xf0
191 const __m128 zdVec = _mm_shuffle_ps(zVec, m_xyzw, 0xf0);
192
193 // a, b, z, d
194 // 0b10000100 == 0x84
195 m_xyzw = _mm_shuffle_ps(m_xyzw, zdVec, 0x84);
196 }
197
198 Q_ALWAYS_INLINE float operator[](int idx) const
199 {
200 switch (idx) {
201 case 0:
202 return x();
203 case 1:
204 return y();
205 case 2:
206 return z();
207 default:
208 Q_UNREACHABLE_RETURN(0.0f);
209 }
210 }
211
212 struct DigitWrapper
213 {
214 explicit DigitWrapper(int idx, Vector3D_SSE *vec)
215 : m_vec(vec)
216 , m_idx(idx)
217 {}
218
219 operator float() const
220 {
221 switch (m_idx) {
222 case 0:
223 return m_vec->x();
224 case 1:
225 return m_vec->y();
226 case 2:
227 return m_vec->z();
228 default:
229 Q_UNREACHABLE_RETURN(0.0f);
230 }
231 }
232
233 void operator =(float value)
234 {
235 switch (m_idx) {
236 case 0:
237 m_vec->setX(value);
238 break;
239 case 1:
240 m_vec->setY(value);
241 break;
242 case 2:
243 m_vec->setZ(value);
244 break;
245 default:
246 Q_UNREACHABLE();
247 }
248 }
249
250 private:
251 Vector3D_SSE *m_vec;
252 const int m_idx;
253 };
254
255 Q_ALWAYS_INLINE DigitWrapper operator[](int idx)
256 {
257 return DigitWrapper(idx, this);
258 }
259
260 static Q_ALWAYS_INLINE float dotProduct(Vector3D_SSE a, Vector3D_SSE b)
261 {
262#if defined(__SSE4_1__)
263 // 0b01111111 = 0x7f
264 return _mm_cvtss_f32(_mm_dp_ps(a.m_xyzw, b.m_xyzw, 0x7f));
265#elif defined(__SSE3__)
266 const __m128 mult = _mm_mul_ps(a.m_xyzw, b.m_xyzw);
267 // a + b, c + d, a + d, c + d
268 const __m128 partialSum = _mm_hadd_ps(mult, mult);
269 // c + d, ......
270 // 0x00000001 =
271 const __m128 partialSumShuffle = _mm_shuffle_ps(partialSum, partialSum, 0x1);
272 return _mm_cvtss_f32(_mm_hadd_ps(partialSum, partialSumShuffle));
273#else
274 const __m128 mult = _mm_mul_ps(a: a.m_xyzw, b: b.m_xyzw);
275
276 // (multX, 0, 0, 0) + (multY, 0, 0, 0) -> (multX + multY, 0, 0, 0)
277 // 0b11111101 == 0xfd
278 const __m128 shuffled = _mm_shuffle_ps(mult, mult, 0xfd);
279 // (multX + multY, 0, 0, 0) + (multZ, 0, 0, 0);
280 // 0b11111110 == 0xfe
281 const __m128 shuffled2 = _mm_shuffle_ps(mult, mult, 0xfe);
282 const __m128 result = _mm_add_ps(a: _mm_add_ps(a: shuffled, b: mult), b: shuffled2);
283 return _mm_cvtss_f32(a: result);
284#endif
285 }
286
287 static Q_ALWAYS_INLINE Vector3D_SSE crossProduct(Vector3D_SSE a, Vector3D_SSE b)
288 {
289 // a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x
290 // (a.y, a.z, a.z, a.x, a.x, a.y) (b.z, b.y, b.x, b.z, b.y, b.x)
291 // (a.y, a.z, a.x) * (b.z, b.x, b.y) - (a.z, a.x, a.y) (b.y, b.z, b.x)
292
293 // 0b11001001 == 0xc9
294 const __m128 a1 = _mm_shuffle_ps(a.m_xyzw, a.m_xyzw, 0xc9);
295 const __m128 b2 = _mm_shuffle_ps(b.m_xyzw, b.m_xyzw, 0xc9);
296 // 0b11010010 == 0xd2
297 const __m128 a2 = _mm_shuffle_ps(a.m_xyzw, a.m_xyzw, 0xd2);
298 const __m128 b1 = _mm_shuffle_ps(b.m_xyzw, b.m_xyzw, 0xd2);
299
300 Vector3D_SSE v(Qt::Uninitialized);
301 v.m_xyzw = _mm_sub_ps(a: _mm_mul_ps(a: a1, b: b1), b: _mm_mul_ps(a: a2, b: b2));
302 return v;
303 }
304
305 friend class Vector4D_SSE;
306 friend class Matrix4x4_SSE;
307 friend Vector3D_SSE operator*(const Vector3D_SSE &vector, const Matrix4x4_SSE &matrix);
308 friend Vector3D_SSE operator*(const Matrix4x4_SSE &matrix, const Vector3D_SSE &vector);
309
310 friend Q_ALWAYS_INLINE const Vector3D_SSE operator+(Vector3D_SSE v1, Vector3D_SSE v2) { return v1 += v2; }
311 friend Q_ALWAYS_INLINE const Vector3D_SSE operator-(Vector3D_SSE v1, Vector3D_SSE v2) { return v1 -= v2; }
312 friend Q_ALWAYS_INLINE const Vector3D_SSE operator*(float factor, Vector3D_SSE vector) { return vector *= factor; }
313 friend Q_ALWAYS_INLINE const Vector3D_SSE operator*(Vector3D_SSE vector, float factor) { return vector *= factor; }
314 friend Q_ALWAYS_INLINE const Vector3D_SSE operator*(Vector3D_SSE v1, Vector3D_SSE v2) { return v1 *= v2; }
315 friend Q_ALWAYS_INLINE const Vector3D_SSE operator-(Vector3D_SSE vector)
316 {
317 Vector3D_SSE c(Qt::Uninitialized);
318
319 c.m_xyzw = _mm_xor_ps(a: vector.m_xyzw, b: _mm_set1_ps(w: -0.0f));
320
321 return c;
322 }
323
324 friend Q_ALWAYS_INLINE const Vector3D_SSE operator/(Vector3D_SSE vector, float divisor) { return vector /= divisor; }
325 friend Q_ALWAYS_INLINE const Vector3D_SSE operator/(Vector3D_SSE vector, Vector3D_SSE divisor) { return vector /= divisor; }
326
327 friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Vector3D_SSE &v);
328 friend Q_ALWAYS_INLINE bool qFuzzyCompare(const Vector3D_SSE& v1, const Vector3D_SSE& v2)
329 {
330 return ::qFuzzyCompare(p1: v1.x(), p2: v2.x()) &&
331 ::qFuzzyCompare(p1: v1.y(), p2: v2.y()) &&
332 ::qFuzzyCompare(p1: v1.z(), p2: v2.z());
333 }
334
335private:
336 // Q_DECL_ALIGN(16) float m[4];// for SSE support
337 __m128 m_xyzw;
338};
339
340} // Qt3DCore
341
342Q_DECLARE_TYPEINFO(Qt3DCore::Vector3D_SSE, Q_PRIMITIVE_TYPE);
343
344QT_END_NAMESPACE
345
346Q_DECLARE_METATYPE(Qt3DCore::Vector3D_SSE)
347
348#endif // __SSE2__
349
350#endif // QT3DCORE_VECTOR3D_SSE_P_H
351

source code of qt3d/src/core/transforms/vector3d_sse_p.h