matrix4x4_sse_p.h source code [qt3d/src/core/transforms/matrix4x4_sse_p.h]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 Paul Lemire <paul.lemire350@gmail.com>
4	** Contact: https://www.qt.io/licensing/
5	**
6	** This file is part of the Qt3D module of the Qt Toolkit.
7	**
8	** $QT_BEGIN_LICENSE:LGPL$
9	** Commercial License Usage
10	** Licensees holding valid commercial Qt licenses may use this file in
11	** accordance with the commercial license agreement provided with the
12	** Software or, alternatively, in accordance with the terms contained in
13	** a written agreement between you and The Qt Company. For licensing terms
14	** and conditions see https://www.qt.io/terms-conditions. For further
15	** information use the contact form at https://www.qt.io/contact-us.
16	**
17	** GNU Lesser General Public License Usage
18	** Alternatively, this file may be used under the terms of the GNU Lesser
19	** General Public License version 3 as published by the Free Software
20	** Foundation and appearing in the file LICENSE.LGPL3 included in the
21	** packaging of this file. Please review the following information to
22	** ensure the GNU Lesser General Public License version 3 requirements
23	** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24	**
25	** GNU General Public License Usage
26	** Alternatively, this file may be used under the terms of the GNU
27	** General Public License version 2.0 or (at your option) the GNU General
28	** Public license version 3 or any later version approved by the KDE Free
29	** Qt Foundation. The licenses are as published by the Free Software
30	** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31	** included in the packaging of this file. Please review the following
32	** information to ensure the GNU General Public License requirements will
33	** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34	** https://www.gnu.org/licenses/gpl-3.0.html.
35	**
36	** $QT_END_LICENSE$
37	**
38	****************************************************************************/
39
40	#ifndef QT3DCORE_MATRIX4X4_SSE_P_H
41	#define QT3DCORE_MATRIX4X4_SSE_P_H
42
43	//
44	// W A R N I N G
45	// -------------
46	//
47	// This file is not part of the Qt3D API. It exists purely as an
48	// implementation detail. This header file may change from version to
49	// version without notice, or even be removed.
50	//
51	// We mean it.
52	//
53
54	#include <Qt3DCore/private/vector4d_p.h>
55	#include <Qt3DCore/private/vector3d_p.h>
56	#include <private/qsimd_p.h>
57	#include <QMatrix4x4>
58
59	#ifdef QT_COMPILER_SUPPORTS_SSE2
60
61	QT_BEGIN_NAMESPACE
62
63	namespace Qt3DCore {
64
65	class Matrix4x4_SSE
66	{
67	public:
68
69	Q_ALWAYS_INLINE Matrix4x4_SSE() { setToIdentity(); }
70	explicit Q_ALWAYS_INLINE Matrix4x4_SSE(Qt::Initialization) {}
71
72	// QMatrix4x4::constData returns in column major order
73	explicit Q_ALWAYS_INLINE Matrix4x4_SSE(const QMatrix4x4 &mat)
74	{
75	// data may not be properly aligned, using unaligned loads
76	const float *data = mat.constData();
77	m_col1 = _mm_loadu_ps(p: data);
78	m_col2 = _mm_loadu_ps(p: data + `4`);
79	m_col3 = _mm_loadu_ps(p: data + `8`);
80	m_col4 = _mm_loadu_ps(p: data + `12`);
81	}
82
83	// Assumes data is 16 bytes aligned (and in column major order)
84	explicit Q_ALWAYS_INLINE Matrix4x4_SSE(float *data)
85	{
86	m_col1 = _mm_load_ps(p: data);
87	m_col2 = _mm_load_ps(p: data + `4`);
88	m_col3 = _mm_load_ps(p: data + `8`);
89	m_col4 = _mm_load_ps(p: data + `12`);
90	}
91
92	// In (row major) but we store in column major order
93	explicit Q_ALWAYS_INLINE Matrix4x4_SSE(float m11, float m12, float m13, float m14,
94	float m21, float m22, float m23, float m24,
95	float m31, float m32, float m33, float m34,
96	float m41, float m42, float m43, float m44)
97	{
98	m_col1 = _mm_set_ps(z: m41, y: m31, x: m21, w: m11);
99	m_col2 = _mm_set_ps(z: m42, y: m32, x: m22, w: m12);
100	m_col3 = _mm_set_ps(z: m43, y: m33, x: m23, w: m13);
101	m_col4 = _mm_set_ps(z: m44, y: m34, x: m24, w: m14);
102	}
103
104	Q_ALWAYS_INLINE void setToIdentity()
105	{
106	m_col1 = _mm_set_ss(w: `1.0f`);
107	m_col2 = _mm_set_ps(z: `0.0f`, y: `0.0f`, x: `1.0f`, w: `0.0f`);
108	m_col3 = _mm_set_ps(z: `0.0f`, y: `1.0f`, x: `0.0f`, w: `0.0f`);
109	m_col4 = _mm_set_ps(z: `1.0f`, y: `0.0f`, x: `0.0f`, w: `0.0f`);
110	}
111
112	Q_ALWAYS_INLINE Matrix4x4_SSE operator(const* Matrix4x4_SSE &other) const
113	{
114	Matrix4x4_SSE c(Qt::Uninitialized);
115
116	const __m128 c1 = m_col1;
117	const __m128 c2 = m_col2;
118	const __m128 c3 = m_col3;
119	const __m128 c4 = m_col4;
120
121	// c11, c21, c31, c41
122	// 1) (m11 x n11), (m11 x n21), (m11 x n31), (m11 x n41)
123	// 2) (m11 x n11) + (m21 x n12), (m11 x n21) + (m21 x n22), (m11 x n31) + (m21 x n32), (m11 x n41) + (m21 x n42)
124	// 3) (m11 x n11) + (m21 x n21) + (m31 x n13), (m11 x n21) + (m21 x n22) + (m31 x n 23), (m11 x n31) + (m21 x n32) + (m31 x n33), (m11 x n41) + (m21 x n42) (m31 x n43)
125	// 4) (m11 x n11) + (m21 x n21) + (m31 x n13) + (m41 x n14), (m11 x n21) + (m21 x n22) + (m31 x n 23) + (m41 x n24), (m11 x n31) + (m21 x n32) + (m31 x n33) + (m41 x n34), (m11 x n41) + (m21 x n42) (m31 x n43) + (m41 x n44)
126	__m128 tmp = _mm_mul_ps(a: _mm_set1_ps(w: other.m11()), b: c1);
127	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m21()), b: c2), b: tmp);
128	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m31()), b: c3), b: tmp);
129	c.m_col1 = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m41()), b: c4), b: tmp);
130
131	// c21, c22, c23, c24
132	tmp = _mm_mul_ps(a: _mm_set1_ps(w: other.m12()), b: c1);
133	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m22()), b: c2), b: tmp);
134	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m32()), b: c3), b: tmp);
135	c.m_col2 = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m42()), b: c4), b: tmp);
136
137	// c31, c32, c33, c34
138	tmp = _mm_mul_ps(a: _mm_set1_ps(w: other.m13()), b: c1);
139	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m23()), b: c2), b: tmp);
140	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m33()), b: c3), b: tmp);
141	c.m_col3 = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m43()), b: c4), b: tmp);
142
143	// c41, c42, c43, c44
144	tmp = _mm_mul_ps(a: _mm_set1_ps(w: other.m14()), b: c1);
145	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m24()), b: c2), b: tmp);
146	tmp = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m34()), b: c3), b: tmp);
147	c.m_col4 = _mm_add_ps(a: _mm_mul_ps(a: _mm_set1_ps(w: other.m44()), b: c4), b: tmp);
148
149	return c;
150	}
151
152	Q_ALWAYS_INLINE Matrix4x4_SSE operator-(const Matrix4x4_SSE &other) const
153	{
154	Matrix4x4_SSE c(Qt::Uninitialized);
155
156	c.m_col1 = _mm_sub_ps(a: m_col1, b: other.m_col1);
157	c.m_col2 = _mm_sub_ps(a: m_col2, b: other.m_col2);
158	c.m_col3 = _mm_sub_ps(a: m_col3, b: other.m_col3);
159	c.m_col4 = _mm_sub_ps(a: m_col4, b: other.m_col4);
160
161	return c;
162	}
163
164	Q_ALWAYS_INLINE Matrix4x4_SSE operator+(const Matrix4x4_SSE &other) const
165	{
166	Matrix4x4_SSE c(Qt::Uninitialized);
167
168	c.m_col1 = _mm_add_ps(a: m_col1, b: other.m_col1);
169	c.m_col2 = _mm_add_ps(a: m_col2, b: other.m_col2);
170	c.m_col3 = _mm_add_ps(a: m_col3, b: other.m_col3);
171	c.m_col4 = _mm_add_ps(a: m_col4, b: other.m_col4);
172
173	return c;
174	}
175
176	Q_ALWAYS_INLINE Matrix4x4_SSE &operator=(const* Matrix4x4_SSE &other)
177	{
178	*this = *this * other;
179	return *this;
180	}
181
182	Q_ALWAYS_INLINE Matrix4x4_SSE &operator-=(const Matrix4x4_SSE &other)
183	{
184	*this = *this - other;
185	return *this;
186	}
187
188	Q_ALWAYS_INLINE Matrix4x4_SSE &operator+=(const Matrix4x4_SSE &other)
189	{
190	*this = *this + other;
191	return *this;
192	}
193
194	Q_ALWAYS_INLINE Matrix4x4_SSE transposed() const
195	{
196	Matrix4x4_SSE c(Qt::Uninitialized);
197
198	// ~113 instructions
199	// 0b11011101 == 0xdd
200	// 0b10001000 == 0x88
201	const __m128 tmp1 = _mm_shuffle_ps(m_col1, m_col2, `0xdd`);
202	const __m128 tmp2 = _mm_shuffle_ps(m_col1, m_col2, `0x88`);
203	const __m128 tmp3 = _mm_shuffle_ps(m_col3, m_col4, `0xdd`);
204	const __m128 tmp4 = _mm_shuffle_ps(m_col3, m_col4, `0x88`);
205	c.m_col1 = _mm_shuffle_ps(tmp2, tmp4, `0x88`);
206	c.m_col2 = _mm_shuffle_ps(tmp1, tmp3, `0x88`);
207	c.m_col3 = _mm_shuffle_ps(tmp2, tmp4, `0xdd`);
208	c.m_col4 = _mm_shuffle_ps(tmp1, tmp3, `0xdd`);
209
210	return c;
211	}
212
213	Q_ALWAYS_INLINE Matrix4x4_SSE inverted() const
214	{
215	// TO DO: Optimize
216	const QMatrix4x4 mat = toQMatrix4x4();
217	return Matrix4x4_SSE (mat.inverted());
218	}
219
220	Q_ALWAYS_INLINE bool operator==(const Matrix4x4_SSE &other) const
221	{
222	// 0b1111 == 0xf
223	return (_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_col1, b: other.m_col1)) == `0xf` &&
224	_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_col2, b: other.m_col2)) == `0xf` &&
225	_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_col3, b: other.m_col3)) == `0xf` &&
226	_mm_movemask_ps(a: _mm_cmpeq_ps(a: m_col4, b: other.m_col4)) == `0xf`);
227	}
228
229	Q_ALWAYS_INLINE bool operator!=(const Matrix4x4_SSE &other) const
230	{
231	return !(*this == other);
232	}
233
234	Q_ALWAYS_INLINE float m11() const { return _mm_cvtss_f32(a: m_col1); }
235	Q_ALWAYS_INLINE float m12() const { return _mm_cvtss_f32(a: m_col2); }
236	Q_ALWAYS_INLINE float m13() const { return _mm_cvtss_f32(a: m_col3); }
237	Q_ALWAYS_INLINE float m14() const { return _mm_cvtss_f32(a: m_col4); }
238
239	Q_ALWAYS_INLINE float m21() const
240	{
241	// 0b01010101 = 0x55
242	return _mm_cvtss_f32(_mm_shuffle_ps(m_col1, m_col1, `0x55`));
243	}
244	Q_ALWAYS_INLINE float m22() const
245	{
246	// 0b01010101 = 0x55
247	return _mm_cvtss_f32(_mm_shuffle_ps(m_col2, m_col2, `0x55`));
248	}
249	Q_ALWAYS_INLINE float m23() const
250	{
251	// 0b01010101 = 0x55
252	return _mm_cvtss_f32(_mm_shuffle_ps(m_col3, m_col3, `0x55`));
253	}
254	Q_ALWAYS_INLINE float m24() const
255	{
256	// 0b01010101 = 0x55
257	return _mm_cvtss_f32(_mm_shuffle_ps(m_col4, m_col4, `0x55`));
258	}
259
260	Q_ALWAYS_INLINE float m31() const
261	{
262	// 0b10101010 = 0xaa
263	return _mm_cvtss_f32(_mm_shuffle_ps(m_col1, m_col1, `0xaa`));
264	}
265	Q_ALWAYS_INLINE float m32() const
266	{
267	// 0b10101010 = 0xaa
268	return _mm_cvtss_f32(_mm_shuffle_ps(m_col2, m_col2, `0xaa`));
269	}
270	Q_ALWAYS_INLINE float m33() const
271	{
272	// 0b10101010 = 0xaa
273	return _mm_cvtss_f32(_mm_shuffle_ps(m_col3, m_col3, `0xaa`));
274	}
275	Q_ALWAYS_INLINE float m34() const
276	{
277	// 0b10101010 = 0xaa
278	return _mm_cvtss_f32(_mm_shuffle_ps(m_col4, m_col4, `0xaa`));
279	}
280
281	Q_ALWAYS_INLINE float m41() const
282	{
283	// 0b11111111 = 0xff
284	return _mm_cvtss_f32(_mm_shuffle_ps(m_col1, m_col1, `0xff`));
285	}
286	Q_ALWAYS_INLINE float m42() const
287	{
288	// 0b11111111 = 0xff
289	return _mm_cvtss_f32(_mm_shuffle_ps(m_col2, m_col2, `0xff`));
290	}
291	Q_ALWAYS_INLINE float m43() const
292	{
293	// 0b11111111 = 0xff
294	return _mm_cvtss_f32(_mm_shuffle_ps(m_col3, m_col3, `0xff`));
295	}
296	Q_ALWAYS_INLINE float m44() const
297	{
298	// 0b11111111 = 0xff
299	return _mm_cvtss_f32(_mm_shuffle_ps(m_col4, m_col4, `0xff`));
300	}
301
302	Q_ALWAYS_INLINE Vector4D row(int index) const
303	{
304	switch (index) {
305	case `0`:
306	return Vector4D (m11(), m12(), m13(), m14());
307	case `1`:
308	return Vector4D (m21(), m22(), m23(), m24());
309	case `2`:
310	return Vector4D (m31(), m32(), m33(), m34());
311	case `3`:
312	return Vector4D (m41(), m42(), m43(), m44());
313	default:
314	Q_UNREACHABLE();
315	return Vector4D ();
316	}
317	}
318
319	Q_ALWAYS_INLINE Vector4D column(int index) const
320	{
321	Vector4D c(Qt::Uninitialized);
322	switch (index) {
323	case `0`:
324	c.m_xyzw = m_col1;
325	break;
326	case `1`:
327	c.m_xyzw = m_col2;
328	break;
329	case `2`:
330	c.m_xyzw = m_col3;
331	break;
332	case `3`:
333	c.m_xyzw = m_col4;
334	break;
335	default:
336	Q_UNREACHABLE();
337	return Vector4D ();
338	}
339	return c;
340	}
341
342	Q_ALWAYS_INLINE QMatrix4x4 toQMatrix4x4() const { return QMatrix4x4 (m11(), m12(), m13(), m14(),
343	m21(), m22(), m23(), m24(),
344	m31(), m32(), m33(), m34(),
345	m41(), m42(), m43(), m44()); }
346
347	Q_ALWAYS_INLINE Vector3D_SSE map(const Vector3D_SSE &point) const
348	{
349	return *this * point;
350	}
351
352	Q_ALWAYS_INLINE Vector4D_SSE map(const Vector4D_SSE &point) const
353	{
354	return *this * point;
355	}
356
357	Q_ALWAYS_INLINE Vector3D_SSE mapVector(const Vector3D_SSE &vector) const
358	{
359	const Vector3D_SSE row1(m11(), m12(), m13());
360	const Vector3D_SSE row2(m21(), m22(), m23());
361	const Vector3D_SSE row3(m31(), m32(), m33());
362
363	return Vector3D (Vector3D_SSE::dotProduct(a: row1, b: vector),
364	Vector3D_SSE::dotProduct(a: row2, b: vector),
365	Vector3D_SSE::dotProduct(a: row3, b: vector));
366	}
367
368	friend Q_ALWAYS_INLINE Vector4D operator(const* Vector4D &vector, const Matrix4x4_SSE &matrix);
369	friend Q_ALWAYS_INLINE Vector4D operator(const* Matrix4x4_SSE &matrix, const Vector4D &vector);
370
371	friend Q_ALWAYS_INLINE Vector3D operator(const* Vector3D &vector, const Matrix4x4_SSE &matrix);
372	friend Q_ALWAYS_INLINE Vector3D operator(const* Matrix4x4_SSE &matrix, const Vector3D &vector);
373
374	friend Q_3DCORE_PRIVATE_EXPORT QDebug operator<<(QDebug dbg, const Matrix4x4_SSE &m);
375
376	private:
377	// Internally we will store the matrix as indicated below
378	// Q_DECL_ALIGN(16) // aligned on 16 bytes boundary for SSE (column major)
379	// struct
380	// {
381	// float m_m11, m_m21, m_m31, m_m41;
382	// float m_m12, m_m22, m_m32, m_m42;
383	// float m_m13, m_m23, m_m33, m_m43;
384	// float m_m14, m_m24, m_m34, m_m44;
385	// };
386	// struct
387	// {
388	// float m[16];
389	// };
390	__m128 m_col1;
391	__m128 m_col2;
392	__m128 m_col3;
393	__m128 m_col4;
394	};
395
396	Q_ALWAYS_INLINE Vector4D operator(const* Vector4D &vector, const Matrix4x4_SSE &matrix)
397	{
398	const __m128 vCol1 = _mm_mul_ps(a: matrix.m_col1, b: vector.m_xyzw);
399	const __m128 vCol2 = _mm_mul_ps(a: matrix.m_col2, b: vector.m_xyzw);
400	const __m128 vCol3 = _mm_mul_ps(a: matrix.m_col3, b: vector.m_xyzw);
401	const __m128 vCol4 = _mm_mul_ps(a: matrix.m_col4, b: vector.m_xyzw);
402
403
404	// 0b01000100 == 0x44
405	// 0b11101110 == 0xee
406
407	// vCol1.x, vCol1.y, vCol2.x, vCol2.y
408	__m128 tmp1 = _mm_shuffle_ps(vCol1, vCol2, `0x44`);
409	// vCol1.z, vCol1.w, vCol2.z, vCol2.w
410	__m128 tmp2 = _mm_shuffle_ps(vCol1, vCol2, `0xee`);
411
412	// vCol1.x + vCol1.z, vCol1.y + vCol1.w, vCol2.x + vCol2.z, vCol2.y + vCol2.w,
413	const __m128 tmpSum01 = _mm_add_ps(a: tmp1, b: tmp2);
414
415	// vCol3.x, vCol3.y, vCol4.x, vCol4.y
416	tmp1 = _mm_shuffle_ps(vCol3, vCol4, `0x44`);
417	// vCol3.z, vCol3.w, vCol4.z, vCol4.w
418	tmp2 = _mm_shuffle_ps(vCol3, vCol4, `0xee`);
419
420	// vCol3.x + vCol3.z, vCol3.y + vCol3.w, vCol4.x + vCol4.z, vCol4.y + vCol4.w,
421	const __m128 tmpSum02 = _mm_add_ps(a: tmp1, b: tmp2);
422
423	// 0b10001000 == 0x88
424	// 0b11011101 == 0xdd
425
426	// vCol1.x + vCol1.z, vCol2.x + vCol2.z, vCol3.x + vCol3.z, vCol4.x + vCol4.z,
427	tmp1 = _mm_shuffle_ps(tmpSum01, tmpSum02, `0x88`);
428	// vCol1.y + vCol1.w, vCol2.y + vCol2.w, vCol3.y + vCol3.w, vCol4.y + vCol4.w,
429	tmp2 = _mm_shuffle_ps(tmpSum01, tmpSum02, `0xdd`);
430
431	Vector4D v(Qt::Uninitialized);
432	v.m_xyzw = _mm_add_ps(a: tmp1, b: tmp2);
433	return v;
434	}
435
436	Q_ALWAYS_INLINE Vector4D operator(const* Matrix4x4_SSE &matrix, const Vector4D &vector)
437	{
438	const Matrix4x4_SSE transposed = matrix.transposed();
439	return vector * transposed;
440	}
441
442	Q_ALWAYS_INLINE Vector3D operator(const* Vector3D &vector, const Matrix4x4_SSE &matrix)
443	{
444	const __m128 vec4 = _mm_set_ps(z: `1.0f`, y: vector.z(), x: vector.y(), w: vector.x());
445
446	const __m128 vCol1 = _mm_mul_ps(a: matrix.m_col1, b: vec4);
447	const __m128 vCol2 = _mm_mul_ps(a: matrix.m_col2, b: vec4);
448	const __m128 vCol3 = _mm_mul_ps(a: matrix.m_col3, b: vec4);
449	const __m128 vCol4 = _mm_mul_ps(a: matrix.m_col4, b: vec4);
450
451	// 0b01000100 == 0x44
452	// 0b11101110 == 0xee
453
454	// vCol1.x, vCol1.y, vCol2.x, vCol2.y
455	__m128 tmp1 = _mm_shuffle_ps(vCol1, vCol2, `0x44`);
456	// vCol1.z, vCol1.w, vCol2.z, vCol2.w
457	__m128 tmp2 = _mm_shuffle_ps(vCol1, vCol2, `0xee`);
458
459	// vCol1.x + vCol1.z, vCol1.y + vCol1.w, vCol2.x + vCol2.z, vCol2.y + vCol2.w,
460	const __m128 tmpSum01 = _mm_add_ps(a: tmp1, b: tmp2);
461
462	// vCol3.x, vCol3.y, vCol4.x, vCol4.y
463	tmp1 = _mm_shuffle_ps(vCol3, vCol4, `0x44`);
464	// vCol3.z, vCol3.w, vCol4.z, vCol4.w
465	tmp2 = _mm_shuffle_ps(vCol3, vCol4, `0xee`);
466
467	// vCol3.x + vCol3.z, vCol3.y + vCol3.w, vCol4.x + vCol4.z, vCol4.y + vCol4.w,
468	const __m128 tmpSum02 = _mm_add_ps(a: tmp1, b: tmp2);
469
470	// 0b10001000 == 0x88
471	// 0b11011101 == 0xdd
472
473	// vCol1.x + vCol1.z, vCol2.x + vCol2.z, vCol3.x + vCol3.z, vCol4.x + vCol4.z,
474	tmp1 = _mm_shuffle_ps(tmpSum01, tmpSum02, `0x88`);
475	// vCol1.y + vCol1.w, vCol2.y + vCol2.w, vCol3.y + vCol3.w, vCol4.y + vCol4.w,
476	tmp2 = _mm_shuffle_ps(tmpSum01, tmpSum02, `0xdd`);
477
478	const __m128 result = _mm_add_ps(a: tmp1, b: tmp2);
479	// 0b11111111 = 0xff
480	const __m128 divisor = _mm_shuffle_ps(result, result, `0xff`);
481	Vector3D v(Qt::Uninitialized);
482	v.m_xyzw = _mm_div_ps(a: result, b: divisor);
483	return v;
484	}
485
486	Q_ALWAYS_INLINE Vector3D operator(const* Matrix4x4_SSE &matrix, const Vector3D &vector)
487	{
488	const Matrix4x4_SSE transposed = matrix.transposed();
489	return vector * transposed;
490	}
491
492	} // Qt3DCore
493
494
495	Q_DECLARE_TYPEINFO(Qt3DCore::Matrix4x4_SSE, Q_PRIMITIVE_TYPE);
496
497	QT_END_NAMESPACE
498
499	Q_DECLARE_METATYPE(Qt3DCore::Matrix4x4_SSE)
500
501	#endif // QT_COMPILER_SUPPORTS_SSE2
502
503	#endif // QT3DCORE_MATRIX4X4_SSE_P_H
504

source code of qt3d/src/core/transforms/matrix4x4_sse_p.h