graphene-simd4f.h source code [gtk/subprojects/graphene/include/graphene-simd4f.h]

1	/ graphene-simd4f.h: SIMD wrappers and operations*
2	*
3	* SPDX-License-Identifier: MIT
4	*
5	* Copyright 2014 Emmanuele Bassi
6	*
7	* Permission is hereby granted, free of charge, to any person obtaining a copy
8	* of this software and associated documentation files (the "Software"), to deal
9	* in the Software without restriction, including without limitation the rights
10	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11	* copies of the Software, and to permit persons to whom the Software is
12	* furnished to do so, subject to the following conditions:
13	*
14	* The above copyright notice and this permission notice shall be included in
15	* all copies or substantial portions of the Software.
16	*
17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SH1_0 THE
20	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23	* THE SOFTWARE.
24	*/
25
26	#pragma once
27
28	#if !defined(GRAPHENE_H_INSIDE) && !defined(GRAPHENE_COMPILATION)
29	#error "Only graphene.h can be included directly."
30	#endif
31
32	/ needed for memcpy() /
33	#include <string.h>
34	#include <math.h>
35	#include <float.h>
36
37	#include "graphene-config.h"
38	#include "graphene-macros.h"
39	#include "graphene-version-macros.h"
40
41	GRAPHENE_BEGIN_DECLS
42
43	/ Platform specific operations /
44
45	GRAPHENE_AVAILABLE_IN_1_0
46	graphene_simd4f_t graphene_simd4f_init (float x,
47	float y,
48	float z,
49	float w);
50	GRAPHENE_AVAILABLE_IN_1_0
51	graphene_simd4f_t graphene_simd4f_init_zero (void);
52	GRAPHENE_AVAILABLE_IN_1_0
53	graphene_simd4f_t graphene_simd4f_init_4f (const float *v);
54	GRAPHENE_AVAILABLE_IN_1_0
55	graphene_simd4f_t graphene_simd4f_init_3f (const float *v);
56	GRAPHENE_AVAILABLE_IN_1_0
57	graphene_simd4f_t graphene_simd4f_init_2f (const float *v);
58
59	GRAPHENE_AVAILABLE_IN_1_0
60	void graphene_simd4f_dup_4f (const graphene_simd4f_t s,
61	float *v);
62	GRAPHENE_AVAILABLE_IN_1_0
63	void graphene_simd4f_dup_3f (const graphene_simd4f_t s,
64	float *v);
65	GRAPHENE_AVAILABLE_IN_1_0
66	void graphene_simd4f_dup_2f (const graphene_simd4f_t s,
67	float *v);
68
69	GRAPHENE_AVAILABLE_IN_1_2
70	float graphene_simd4f_get (const graphene_simd4f_t s,
71	unsigned int i);
72	GRAPHENE_AVAILABLE_IN_1_0
73	float graphene_simd4f_get_x (const graphene_simd4f_t s);
74	GRAPHENE_AVAILABLE_IN_1_0
75	float graphene_simd4f_get_y (const graphene_simd4f_t s);
76	GRAPHENE_AVAILABLE_IN_1_0
77	float graphene_simd4f_get_z (const graphene_simd4f_t s);
78	GRAPHENE_AVAILABLE_IN_1_0
79	float graphene_simd4f_get_w (const graphene_simd4f_t s);
80
81	GRAPHENE_AVAILABLE_IN_1_0
82	graphene_simd4f_t graphene_simd4f_splat (float v);
83	GRAPHENE_AVAILABLE_IN_1_0
84	graphene_simd4f_t graphene_simd4f_splat_x (const graphene_simd4f_t s);
85	GRAPHENE_AVAILABLE_IN_1_0
86	graphene_simd4f_t graphene_simd4f_splat_y (const graphene_simd4f_t s);
87	GRAPHENE_AVAILABLE_IN_1_0
88	graphene_simd4f_t graphene_simd4f_splat_z (const graphene_simd4f_t s);
89	GRAPHENE_AVAILABLE_IN_1_0
90	graphene_simd4f_t graphene_simd4f_splat_w (const graphene_simd4f_t s);
91
92	GRAPHENE_AVAILABLE_IN_1_0
93	graphene_simd4f_t graphene_simd4f_add (const graphene_simd4f_t a,
94	const graphene_simd4f_t b);
95	GRAPHENE_AVAILABLE_IN_1_0
96	graphene_simd4f_t graphene_simd4f_sub (const graphene_simd4f_t a,
97	const graphene_simd4f_t b);
98	GRAPHENE_AVAILABLE_IN_1_0
99	graphene_simd4f_t graphene_simd4f_mul (const graphene_simd4f_t a,
100	const graphene_simd4f_t b);
101	GRAPHENE_AVAILABLE_IN_1_0
102	graphene_simd4f_t graphene_simd4f_div (const graphene_simd4f_t a,
103	const graphene_simd4f_t b);
104
105	GRAPHENE_AVAILABLE_IN_1_0
106	graphene_simd4f_t graphene_simd4f_sqrt (const graphene_simd4f_t s);
107	GRAPHENE_AVAILABLE_IN_1_0
108	graphene_simd4f_t graphene_simd4f_reciprocal (const graphene_simd4f_t s);
109	GRAPHENE_AVAILABLE_IN_1_0
110	graphene_simd4f_t graphene_simd4f_rsqrt (const graphene_simd4f_t s);
111
112	GRAPHENE_AVAILABLE_IN_1_0
113	graphene_simd4f_t graphene_simd4f_cross3 (const graphene_simd4f_t a,
114	const graphene_simd4f_t b);
115	GRAPHENE_AVAILABLE_IN_1_0
116	graphene_simd4f_t graphene_simd4f_dot3 (const graphene_simd4f_t a,
117	const graphene_simd4f_t b);
118	GRAPHENE_AVAILABLE_IN_1_4
119	float graphene_simd4f_dot3_scalar (const graphene_simd4f_t a,
120	const graphene_simd4f_t b);
121
122	GRAPHENE_AVAILABLE_IN_1_0
123	graphene_simd4f_t graphene_simd4f_min (const graphene_simd4f_t a,
124	const graphene_simd4f_t b);
125	GRAPHENE_AVAILABLE_IN_1_0
126	graphene_simd4f_t graphene_simd4f_max (const graphene_simd4f_t a,
127	const graphene_simd4f_t b);
128
129	GRAPHENE_AVAILABLE_IN_1_0
130	graphene_simd4f_t graphene_simd4f_shuffle_wxyz (const graphene_simd4f_t s);
131	GRAPHENE_AVAILABLE_IN_1_0
132	graphene_simd4f_t graphene_simd4f_shuffle_zwxy (const graphene_simd4f_t s);
133	GRAPHENE_AVAILABLE_IN_1_0
134	graphene_simd4f_t graphene_simd4f_shuffle_yzwx (const graphene_simd4f_t s);
135
136	GRAPHENE_AVAILABLE_IN_1_0
137	graphene_simd4f_t graphene_simd4f_zero_w (const graphene_simd4f_t s);
138	GRAPHENE_AVAILABLE_IN_1_0
139	graphene_simd4f_t graphene_simd4f_zero_zw (const graphene_simd4f_t s);
140
141	GRAPHENE_AVAILABLE_IN_1_0
142	graphene_simd4f_t graphene_simd4f_merge_high (const graphene_simd4f_t a,
143	const graphene_simd4f_t b);
144	GRAPHENE_AVAILABLE_IN_1_0
145	graphene_simd4f_t graphene_simd4f_merge_low (const graphene_simd4f_t a,
146	const graphene_simd4f_t b);
147	GRAPHENE_AVAILABLE_IN_1_0
148	graphene_simd4f_t graphene_simd4f_merge_w (const graphene_simd4f_t s,
149	float v);
150
151	GRAPHENE_AVAILABLE_IN_1_0
152	graphene_simd4f_t graphene_simd4f_flip_sign_0101 (const graphene_simd4f_t s);
153	GRAPHENE_AVAILABLE_IN_1_0
154	graphene_simd4f_t graphene_simd4f_flip_sign_1010 (const graphene_simd4f_t s);
155
156	GRAPHENE_AVAILABLE_IN_1_0
157	bool graphene_simd4f_cmp_eq (const graphene_simd4f_t a,
158	const graphene_simd4f_t b);
159	GRAPHENE_AVAILABLE_IN_1_0
160	bool graphene_simd4f_cmp_neq (const graphene_simd4f_t a,
161	const graphene_simd4f_t b);
162	GRAPHENE_AVAILABLE_IN_1_2
163	bool graphene_simd4f_cmp_lt (const graphene_simd4f_t a,
164	const graphene_simd4f_t b);
165	GRAPHENE_AVAILABLE_IN_1_2
166	bool graphene_simd4f_cmp_le (const graphene_simd4f_t a,
167	const graphene_simd4f_t b);
168	GRAPHENE_AVAILABLE_IN_1_2
169	bool graphene_simd4f_cmp_ge (const graphene_simd4f_t a,
170	const graphene_simd4f_t b);
171	GRAPHENE_AVAILABLE_IN_1_2
172	bool graphene_simd4f_cmp_gt (const graphene_simd4f_t a,
173	const graphene_simd4f_t b);
174	GRAPHENE_AVAILABLE_IN_1_0
175	graphene_simd4f_t graphene_simd4f_neg (const graphene_simd4f_t s);
176
177	#if !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_SSE)
178
179	/ SSE2 implementation of SIMD 4f /
180
181	/ Union type used to do single lane reading without memcpy /
182	typedef union {
183	graphene_simd4f_t s;
184	float f[`4`];
185	} graphene_simd4f_union_t;
186
187	/ On GCC, we use __extension__ macros to avoid a static inline /
188	# if defined(__GNUC__)
189
190	/ Use GCC statement __extension__ to inline all these functions /
191
192	# define graphene_simd4f_init(x,y,z,w) \
193	(__extension__ ({ \
194	(graphene_simd4f_t) { (x), (y), (z), (w) }; \
195	}))
196
197	# define graphene_simd4f_init_zero() \
198	(__extension__ ({ \
199	(graphene_simd4f_t) _mm_setzero_ps(); \
200	}))
201
202	# define graphene_simd4f_init_4f(v) \
203	(__extension__ ({ \
204	(graphene_simd4f_t) _mm_loadu_ps (v); \
205	}))
206
207	# define graphene_simd4f_init_3f(v) \
208	(__extension__ ({ \
209	(graphene_simd4f_t) { (v)[0], (v)[1], (v)[2], 0.f }; \
210	}))
211
212	# define graphene_simd4f_init_2f(v) \
213	(__extension__ ({ \
214	(graphene_simd4f_t) { (v)[0], (v)[1], 0.f, 0.f }; \
215	}))
216
217	# define graphene_simd4f_dup_4f(s,v) \
218	(__extension__ ({ \
219	_mm_storeu_ps ((v), (s)); \
220	}))
221
222	# define graphene_simd4f_dup_3f(s,v) \
223	(__extension__ ({ \
224	memcpy ((v), &(s), sizeof (float) * 3); \
225	}))
226
227	# define graphene_simd4f_dup_2f(s,v) \
228	(__extension__ ({ \
229	memcpy ((v), &(s), sizeof (float) * 2); \
230	}))
231
232	# define graphene_simd4f_get(s,i) \
233	(__extension__ ({ \
234	graphene_simd4f_union_t __u = { (s) }; \
235	(float) __u.f[(i)]; \
236	}))
237
238	# define graphene_simd4f_get_x(s) graphene_simd4f_get (s, 0)
239	# define graphene_simd4f_get_y(s) graphene_simd4f_get (s, 1)
240	# define graphene_simd4f_get_z(s) graphene_simd4f_get (s, 2)
241	# define graphene_simd4f_get_w(s) graphene_simd4f_get (s, 3)
242
243	# define graphene_simd4f_splat(v) \
244	(__extension__ ({ \
245	(graphene_simd4f_t) _mm_set1_ps ((v)); \
246	}))
247
248	# define graphene_simd4f_splat_x(v) \
249	(__extension__ ({ \
250	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (0, 0, 0, 0)); \
251	}))
252
253	# define graphene_simd4f_splat_y(v) \
254	(__extension__ ({ \
255	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (1, 1, 1, 1)); \
256	}))
257
258	# define graphene_simd4f_splat_z(v) \
259	(__extension__ ({ \
260	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (2, 2, 2, 2)); \
261	}))
262
263	# define graphene_simd4f_splat_w(v) \
264	(__extension__ ({ \
265	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (3, 3, 3, 3)); \
266	}))
267
268	# define graphene_simd4f_add(a,b) \
269	(__extension__ ({ \
270	(graphene_simd4f_t) _mm_add_ps ((a), (b)); \
271	}))
272
273	# define graphene_simd4f_sub(a,b) \
274	(__extension__ ({ \
275	(graphene_simd4f_t) _mm_sub_ps ((a), (b)); \
276	}))
277
278	# define graphene_simd4f_mul(a,b) \
279	(__extension__ ({ \
280	(graphene_simd4f_t) _mm_mul_ps ((a), (b)); \
281	}))
282
283	# define graphene_simd4f_div(a,b) \
284	(__extension__ ({ \
285	(graphene_simd4f_t) _mm_div_ps ((a), (b)); \
286	}))
287
288	# define graphene_simd4f_sqrt(v) \
289	(__extension__ ({ \
290	(graphene_simd4f_t) _mm_sqrt_ps ((v)); \
291	}))
292
293	# define graphene_simd4f_reciprocal(v) \
294	(__extension__ ({ \
295	const graphene_simd4f_t __two = graphene_simd4f_init (2.0f, 2.0f, 2.0f, 2.0f); \
296	graphene_simd4f_t __s = _mm_rcp_ps ((v)); \
297	graphene_simd4f_mul (__s, graphene_simd4f_sub (__two, graphene_simd4f_mul ((v), __s))); \
298	}))
299
300	# define graphene_simd4f_rsqrt(v) \
301	(__extension__ ({ \
302	const graphene_simd4f_t __half = graphene_simd4f_init (0.5f, 0.5f, 0.5f, 0.5f); \
303	const graphene_simd4f_t __three = graphene_simd4f_init (3.0f, 3.0f, 3.0f, 3.0f); \
304	graphene_simd4f_t __s = _mm_rsqrt_ps ((v)); \
305	graphene_simd4f_mul (graphene_simd4f_mul (__s, __half), \
306	graphene_simd4f_sub (__three, \
307	graphene_simd4f_mul (__s, graphene_simd4f_mul ((v), __s)))); \
308	}))
309
310	# define graphene_simd4f_cross3(a,b) \
311	(__extension__ ({ \
312	const graphene_simd4f_t __a_yzx = _mm_shuffle_ps ((a), (a), _MM_SHUFFLE (3, 0, 2, 1)); \
313	const graphene_simd4f_t __a_zxy = _mm_shuffle_ps ((a), (a), _MM_SHUFFLE (3, 1, 0, 2)); \
314	const graphene_simd4f_t __b_yzx = _mm_shuffle_ps ((b), (b), _MM_SHUFFLE (3, 0, 2, 1)); \
315	const graphene_simd4f_t __b_zxy = _mm_shuffle_ps ((b), (b), _MM_SHUFFLE (3, 1, 0, 2)); \
316	(graphene_simd4f_t) _mm_sub_ps (_mm_mul_ps (__a_yzx, __b_zxy), _mm_mul_ps (__a_zxy, __b_yzx)); \
317	}))
318
319	# if defined(GRAPHENE_USE_SSE4_1)
320	# define graphene_simd4f_dot3(a,b) \
321	(__extension__ ({ \
322	(graphene_simd4f_t) _mm_dp_ps ((a), (b), 0x7f); \
323	}))
324	# else
325	# define graphene_simd4f_dot3(a,b) \
326	(__extension__ ({ \
327	const unsigned int __mask_bits[] GRAPHENE_ALIGN16 = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; \
328	const graphene_simd4f_t __mask = _mm_load_ps ((const float *) __mask_bits); \
329	const graphene_simd4f_t __m = _mm_mul_ps ((a), (b)); \
330	const graphene_simd4f_t __s0 = _mm_and_ps (__m, __mask); \
331	const graphene_simd4f_t __s1 = _mm_add_ps (__s0, _mm_movehl_ps (__s0, __s0)); \
332	const graphene_simd4f_t __s2 = _mm_add_ss (__s1, _mm_shuffle_ps (__s1, __s1, 1)); \
333	(graphene_simd4f_t) _mm_shuffle_ps (__s2, __s2, 0); \
334	}))
335	# endif
336
337	# define graphene_simd4f_dot3_scalar(a,b) \
338	(__extension__ ({ \
339	float __res; \
340	_mm_store_ss (&__res, graphene_simd4f_dot3 (a, b)); \
341	__res; \
342	}))
343
344	# define graphene_simd4f_min(a,b) \
345	(__extension__ ({ \
346	(graphene_simd4f_t) _mm_min_ps ((a), (b)); \
347	}))
348
349	# define graphene_simd4f_max(a,b) \
350	(__extension__ ({ \
351	(graphene_simd4f_t) _mm_max_ps ((a), (b)); \
352	}))
353
354	# define graphene_simd4f_shuffle_wxyz(v) \
355	(__extension__ ({ \
356	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (2, 1, 0, 3)); \
357	}))
358
359	# define graphene_simd4f_shuffle_zwxy(v) \
360	(__extension__ ({ \
361	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (1, 0, 3, 2)); \
362	}))
363
364	# define graphene_simd4f_shuffle_yzwx(v) \
365	(__extension__ ({ \
366	(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (0, 3, 2, 1)); \
367	}))
368
369	# define graphene_simd4f_zero_w(v) \
370	(__extension__ ({ \
371	graphene_simd4f_t __s = _mm_unpackhi_ps ((v), _mm_setzero_ps ()); \
372	(graphene_simd4f_t) _mm_movelh_ps ((v), __s); \
373	}))
374
375	# define graphene_simd4f_zero_zw(v) \
376	(__extension__ ({ \
377	(graphene_simd4f_t) _mm_movelh_ps ((v), _mm_setzero_ps ()); \
378	}))
379
380	# define graphene_simd4f_merge_w(s,v) \
381	(__extension__ ({ \
382	graphene_simd4f_t __s = _mm_unpackhi_ps ((s), _mm_set1_ps ((v))); \
383	(graphene_simd4f_t) _mm_movelh_ps ((s), __s); \
384	}))
385
386	# define graphene_simd4f_merge_high(a,b) \
387	(__extension__ ({ \
388	(graphene_simd4f_t) _mm_movehl_ps ((b), (a)); \
389	}))
390
391	# define graphene_simd4f_merge_low(a,b) \
392	(__extension__ ({ \
393	(graphene_simd4f_t) _mm_movelh_ps ((a), (b)); \
394	}))
395
396	typedef GRAPHENE_ALIGN16 union {
397	unsigned int ui[`4`];
398	float f[`4`];
399	} graphene_simd4f_uif_t;
400
401	# define graphene_simd4f_flip_sign_0101(v) \
402	(__extension__ ({ \
403	const graphene_simd4f_uif_t __pnpn = { { \
404	0x00000000, \
405	0x80000000, \
406	0x00000000, \
407	0x80000000 \
408	} }; \
409	(graphene_simd4f_t) _mm_xor_ps ((v), _mm_load_ps (__pnpn.f)); \
410	}))
411
412	# define graphene_simd4f_flip_sign_1010(v) \
413	(__extension__ ({ \
414	const graphene_simd4f_uif_t __npnp = { { \
415	0x80000000, \
416	0x00000000, \
417	0x80000000, \
418	0x00000000, \
419	} }; \
420	(graphene_simd4f_t) _mm_xor_ps ((v), _mm_load_ps (__npnp.f)); \
421	}))
422
423	# define graphene_simd4f_cmp_eq(a,b) \
424	(__extension__ ({ \
425	__m128i __res = (__m128i) _mm_cmpneq_ps ((a), (b)); \
426	(bool) (_mm_movemask_epi8 (__res) == 0); \
427	}))
428
429	# define graphene_simd4f_cmp_neq(a,b) \
430	(__extension__ ({ \
431	__m128i __res = (__m128i) _mm_cmpneq_ps ((a), (b)); \
432	(bool) (_mm_movemask_epi8 (__res) != 0); \
433	}))
434
435	# define graphene_simd4f_cmp_lt(a,b) \
436	(__extension__ ({ \
437	__m128i __res = (__m128i) _mm_cmplt_ps ((a), (b)); \
438	(bool) (_mm_movemask_epi8 (__res) == 0xffff); \
439	}))
440
441	# define graphene_simd4f_cmp_le(a,b) \
442	(__extension__ ({ \
443	__m128i __res = (__m128i) _mm_cmple_ps ((a), (b)); \
444	(bool) (_mm_movemask_epi8 (__res) == 0xffff); \
445	}))
446
447	# define graphene_simd4f_cmp_ge(a,b) \
448	(__extension__ ({ \
449	__m128i __res = (__m128i) _mm_cmpge_ps ((a), (b)); \
450	(bool) (_mm_movemask_epi8 (__res) == 0xffff); \
451	}))
452
453	# define graphene_simd4f_cmp_gt(a,b) \
454	(__extension__ ({ \
455	__m128i __res = (__m128i) _mm_cmpgt_ps ((a), (b)); \
456	(bool) (_mm_movemask_epi8 (__res) == 0xffff); \
457	}))
458
459	# define graphene_simd4f_neg(s) \
460	(__extension__ ({ \
461	const graphene_simd4f_uif_t __mask = { { \
462	0x80000000, \
463	0x80000000, \
464	0x80000000, \
465	0x80000000, \
466	} }; \
467	(graphene_simd4f_t) _mm_xor_ps ((s), _mm_load_ps (__mask.f)); \
468	}))
469
470	/ On MSVC, we use static inlines /
471	# elif defined (_MSC_VER) /* Visual Studio SSE intrinsics */
472
473	/ Use static inline to inline all these functions /
474
475	#define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w)
476
477	static inline graphene_simd4f_t
478	_simd4f_init (float x, float y, float z, float w)
479	{
480	graphene_simd4f_t __s = { x, y, z, w };
481	return __s;
482	}
483
484	#define graphene_simd4f_init_zero() \
485	_mm_setzero_ps()
486
487	#define graphene_simd4f_init_4f(v) \
488	_mm_loadu_ps(v)
489
490	#define graphene_simd4f_init_3f(v) \
491	graphene_simd4f_init (v[0], v[1], v[2], 0.f)
492
493	#define graphene_simd4f_init_2f(v) \
494	graphene_simd4f_init (v[0], v[1], 0.f, 0.f)
495
496	#define graphene_simd4f_dup_4f(s,v) \
497	_mm_storeu_ps (v, s)
498
499	#define graphene_simd4f_dup_3f(s,v) \
500	memcpy (v, &s, sizeof (float) * 3)
501
502	#define graphene_simd4f_dup_2f(s,v) \
503	memcpy (v, &s, sizeof (float) * 2)
504
505	#define graphene_simd4f_get(s,i) _simd4f_get_xyzw(s, i)
506	#define graphene_simd4f_get_x(s) _simd4f_get_xyzw(s, 0)
507	#define graphene_simd4f_get_y(s) _simd4f_get_xyzw(s, 1)
508	#define graphene_simd4f_get_z(s) _simd4f_get_xyzw(s, 2)
509	#define graphene_simd4f_get_w(s) _simd4f_get_xyzw(s, 3)
510
511	static inline float
512	_simd4f_get_xyzw (graphene_simd4f_t s, int mode)
513	{
514	/ mode: get_x=0*
515	get_y=1
516	get_z=2
517	get_w=3 /*
518
519	graphene_simd4f_union_t u;
520	u.s = s;
521	return u.f[mode];
522	}
523
524	#define graphene_simd4f_splat(v) \
525	_mm_set1_ps (v)
526
527	#define graphene_simd4f_splat_x(v) \
528	_mm_shuffle_ps (v, v, _MM_SHUFFLE (0, 0, 0, 0))
529
530	#define graphene_simd4f_splat_y(v) \
531	_mm_shuffle_ps (v, v, _MM_SHUFFLE (1, 1, 1, 1))
532
533	#define graphene_simd4f_splat_z(v) \
534	_mm_shuffle_ps (v, v, _MM_SHUFFLE (2, 2, 2, 2))
535
536	#define graphene_simd4f_splat_w(v) \
537	_mm_shuffle_ps (v, v, _MM_SHUFFLE (3, 3, 3, 3))
538
539	#define graphene_simd4f_add(a,b) \
540	_mm_add_ps (a, b)
541
542	#define graphene_simd4f_sub(a,b) \
543	_mm_sub_ps (a, b)
544
545	#define graphene_simd4f_mul(a,b) \
546	_mm_mul_ps (a, b)
547
548	#define graphene_simd4f_div(a,b) \
549	_mm_div_ps (a, b)
550
551	#define graphene_simd4f_sqrt(v) \
552	_mm_sqrt_ps (v)
553
554	#define graphene_simd4f_reciprocal(v) _simd4f_reciprocal(v)
555
556	static inline graphene_simd4f_t
557	_simd4f_reciprocal(const graphene_simd4f_t v)
558	{
559	const graphene_simd4f_t __two = graphene_simd4f_init (`2.0f`, `2.0f`, `2.0f`, `2.0f`);
560	graphene_simd4f_t __s = _mm_rcp_ps (v);
561	return graphene_simd4f_mul (__s,
562	graphene_simd4f_sub (__two,
563	graphene_simd4f_mul (v, __s)));
564	}
565
566	#define graphene_simd4f_rsqrt(v) _simd4f_rsqrt(v)
567
568	static inline graphene_simd4f_t
569	_simd4f_rsqrt(const graphene_simd4f_t v)
570	{
571	const graphene_simd4f_t __half = graphene_simd4f_init (`0.5f`, `0.5f`, `0.5f`, `0.5f`);
572	const graphene_simd4f_t __three = graphene_simd4f_init (`3.0f`, `3.0f`, `3.0f`, `3.0f`);
573	graphene_simd4f_t __s = _mm_rsqrt_ps (v);
574	return graphene_simd4f_mul (graphene_simd4f_mul (__s, __half),
575	graphene_simd4f_sub (__three,
576	graphene_simd4f_mul (__s, graphene_simd4f_mul (v, __s))));
577	}
578
579	#define graphene_simd4f_cross3(a,b) \
580	_simd4f_cross3(a,b)
581
582	static inline graphene_simd4f_t
583	_simd4f_cross3 (const graphene_simd4f_t a,
584	const graphene_simd4f_t b)
585	{
586	const graphene_simd4f_t __a_yzx = _mm_shuffle_ps (a, a, _MM_SHUFFLE (`3`, `0`, `2`, `1`));
587	const graphene_simd4f_t __a_zxy = _mm_shuffle_ps (a, a, _MM_SHUFFLE (`3`, `1`, `0`, `2`));
588	const graphene_simd4f_t __b_yzx = _mm_shuffle_ps (b, b, _MM_SHUFFLE (`3`, `0`, `2`, `1`));
589	const graphene_simd4f_t __b_zxy = _mm_shuffle_ps (b, b, _MM_SHUFFLE (`3`, `1`, `0`, `2`));
590
591	return _mm_sub_ps (_mm_mul_ps (__a_yzx, __b_zxy), _mm_mul_ps (__a_zxy, __b_yzx));
592	}
593
594	#define graphene_simd4f_dot3(a,b) \
595	_simd4f_dot3(a,b)
596
597	static inline graphene_simd4f_t
598	_simd4f_dot3 (const graphene_simd4f_t a,
599	const graphene_simd4f_t b)
600	{
601	#if defined(GRAPHENE_USE_SSE4_1)
602	return _mm_dp_ps (a, b, `0x7f`);
603	#else
604	GRAPHENE_ALIGN16 const unsigned int __mask_bits[] = { `0xffffffff`, `0xffffffff`, `0xffffffff`, `0` };
605	const graphene_simd4f_t __mask = _mm_load_ps ((const float *) __mask_bits);
606	const graphene_simd4f_t __m = _mm_mul_ps ((a), (b));
607	const graphene_simd4f_t __s0 = _mm_and_ps (__m, __mask);
608	const graphene_simd4f_t __s1 = _mm_add_ps (__s0, _mm_movehl_ps (__s0, __s0));
609	const graphene_simd4f_t __s2 = _mm_add_ss (__s1, _mm_shuffle_ps (__s1, __s1, `1`));
610
611	return _mm_shuffle_ps (__s2, __s2, `0`);
612	#endif
613	}
614
615	#define graphene_simd4f_dot3_scalar(a,b) \
616	_simd4f_dot3_scalar(a,b)
617
618	static inline float
619	_simd4f_dot3_scalar (const graphene_simd4f_t a,
620	const graphene_simd4f_t b)
621	{
622	float __res;
623	_mm_store_ss (&__res, graphene_simd4f_dot3 (a, b));
624	return __res;
625	}
626
627	#define graphene_simd4f_min(a,b) \
628	_mm_min_ps (a, b)
629
630	#define graphene_simd4f_max(a,b) \
631	_mm_max_ps (a, b)
632
633
634	#define graphene_simd4f_shuffle_wxyz(v) \
635	_mm_shuffle_ps (v, v, _MM_SHUFFLE (2, 1, 0, 3))
636
637	#define graphene_simd4f_shuffle_zwxy(v) \
638	_mm_shuffle_ps (v, v, _MM_SHUFFLE (1, 0, 3, 2))
639
640	#define graphene_simd4f_shuffle_yzwx(v) \
641	_mm_shuffle_ps (v, v, _MM_SHUFFLE (0, 3, 2, 1))
642
643	#define graphene_simd4f_zero_w(v) \
644	_mm_movelh_ps (v, _mm_unpackhi_ps (v, _mm_setzero_ps ()))
645
646	#define graphene_simd4f_zero_zw(v) \
647	_mm_movelh_ps (v, _mm_setzero_ps ())
648
649	#define graphene_simd4f_merge_w(s,v) \
650	_mm_movelh_ps (s, _mm_unpackhi_ps (s, _mm_set1_ps (v)))
651
652	#define graphene_simd4f_merge_high(a,b) \
653	_mm_movehl_ps (b, a)
654
655	#define graphene_simd4f_merge_low(a,b) \
656	_mm_movelh_ps (a, b)
657
658	typedef GRAPHENE_ALIGN16 union {
659	unsigned int ui[`4`];
660	float f[`4`];
661	} graphene_simd4f_uif_t;
662
663	#define graphene_simd4f_flip_sign_0101(v) _simd4f_flip_sign_0101(v)
664
665	static inline graphene_simd4f_t
666	_simd4f_flip_sign_0101 (const graphene_simd4f_t v)
667	{
668	const graphene_simd4f_uif_t __pnpn = { {
669	`0x00000000`,
670	`0x80000000`,
671	`0x00000000`,
672	`0x80000000`
673	} };
674
675	return _mm_xor_ps (v, _mm_load_ps (__pnpn.f));
676	}
677
678	#define graphene_simd4f_flip_sign_1010(v) _simd4f_flip_sign_1010(v)
679
680	static inline graphene_simd4f_t
681	_simd4f_flip_sign_1010(const graphene_simd4f_t v)
682	{
683	const graphene_simd4f_uif_t __npnp = { {
684	`0x80000000`,
685	`0x00000000`,
686	`0x80000000`,
687	`0x00000000`,
688	} };
689
690	return _mm_xor_ps (v, _mm_load_ps (__npnp.f));
691	}
692
693	#define graphene_simd4f_cmp_eq(a,b) _simd4f_cmp_eq(a,b)
694
695	static inline bool
696	_simd4f_cmp_eq (const graphene_simd4f_t a,
697	const graphene_simd4f_t b)
698	{
699	__m128i __res = _mm_castps_si128 (_mm_cmpneq_ps (a, b));
700	return (_mm_movemask_epi8 (__res) == `0`);
701	}
702
703	#define graphene_simd4f_cmp_neq(a,b) _simd4f_cmp_neq(a,b)
704
705	static inline bool
706	_simd4f_cmp_neq (const graphene_simd4f_t a,
707	const graphene_simd4f_t b)
708	{
709	__m128i __res = _mm_castps_si128 (_mm_cmpneq_ps (a, b));
710	return (_mm_movemask_epi8 (__res) != `0`);
711	}
712
713	#define graphene_simd4f_cmp_lt(a,b) _simd4f_cmp_lt(a,b)
714
715	static inline bool
716	_simd4f_cmp_lt (const graphene_simd4f_t a,
717	const graphene_simd4f_t b)
718	{
719	__m128i __res = _mm_castps_si128 (_mm_cmplt_ps (a, b));
720	return (_mm_movemask_epi8 (__res) == `0xffff`);
721	}
722
723	#define graphene_simd4f_cmp_le(a,b) _simd4f_cmp_le(a,b)
724
725	static inline bool
726	_simd4f_cmp_le (const graphene_simd4f_t a,
727	const graphene_simd4f_t b)
728	{
729	__m128i __res = _mm_castps_si128 (_mm_cmple_ps (a, b));
730	return (_mm_movemask_epi8 (__res) == `0xffff`);
731	}
732
733	#define graphene_simd4f_cmp_ge(a,b) _simd4f_cmp_ge(a,b)
734
735	static inline bool
736	_simd4f_cmp_ge (const graphene_simd4f_t a,
737	const graphene_simd4f_t b)
738	{
739	__m128i __res = _mm_castps_si128 (_mm_cmpge_ps (a, b));
740	return (_mm_movemask_epi8 (__res) == `0xffff`);
741	}
742
743	#define graphene_simd4f_cmp_gt(a,b) _simd4f_cmp_gt(a,b)
744
745	static inline bool
746	_simd4f_cmp_gt (const graphene_simd4f_t a,
747	const graphene_simd4f_t b)
748	{
749	__m128i __res = _mm_castps_si128 (_mm_cmpgt_ps (a, b));
750	return (_mm_movemask_epi8 (__res) == `0xffff`);
751	}
752
753	#define graphene_simd4f_neg(s) _simd4f_neg(s)
754
755	static inline graphene_simd4f_t
756	_simd4f_neg (const graphene_simd4f_t s)
757	{
758	const graphene_simd4f_uif_t __mask = { {
759	`0x80000000`,
760	`0x80000000`,
761	`0x80000000`,
762	`0x80000000`,
763	} };
764
765	return _mm_xor_ps (s, _mm_load_ps (__mask.f));
766	}
767
768	#else /* SSE intrinsics-not GCC or Visual Studio */
769
770	# error "Need GCC-compatible or Visual Studio compiler for SSE extensions."
771
772	/ Use static inline to inline all these functions /
773
774	# endif /* !__GNUC__ && !_MSC_VER */
775
776	#elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_GCC)
777
778	/ GCC vector intrinsic implementation of SIMD 4f /
779
780	typedef int graphene_simd4i_t __attribute__((vector_size (`16`)));
781
782	# define graphene_simd4f_init(x,y,z,w) \
783	(__extension__ ({ \
784	(graphene_simd4f_t) { (x), (y), (z), (w) }; \
785	}))
786
787	# define graphene_simd4f_init_zero() \
788	(__extension__ ({ \
789	(graphene_simd4f_t) { 0.f, 0.f, 0.f, 0.f }; \
790	}))
791
792	# define graphene_simd4f_init_4f(v) \
793	(__extension__ ({ \
794	(graphene_simd4f_t) { (v)[0], (v)[1], (v)[2], (v)[3] }; \
795	}))
796
797	# define graphene_simd4f_init_3f(v) \
798	(__extension__ ({ \
799	(graphene_simd4f_t) { (v)[0], (v)[1], (v)[2], 0.f }; \
800	}))
801
802	# define graphene_simd4f_init_2f(v) \
803	(__extension__ ({ \
804	(graphene_simd4f_t) { (v)[0], (v)[1], 0.f, 0.f }; \
805	}))
806
807	# define graphene_simd4f_dup_4f(s,v) \
808	(__extension__ ({ \
809	memcpy ((v), &(s), sizeof (float) * 4); \
810	}))
811
812	# define graphene_simd4f_dup_3f(s,v) \
813	(__extension__ ({ \
814	memcpy ((v), &(s), sizeof (float) * 3); \
815	}))
816
817	# define graphene_simd4f_dup_2f(s,v) \
818	(__extension__ ({ \
819	memcpy ((v), &(s), sizeof (float) * 2); \
820	}))
821
822	# define graphene_simd4f_get(s,i) (__extension__ ({ (float) (s)[(i)]; }))
823	# define graphene_simd4f_get_x(s) graphene_simd4f_get ((s), 0)
824	# define graphene_simd4f_get_y(s) graphene_simd4f_get ((s), 1)
825	# define graphene_simd4f_get_z(s) graphene_simd4f_get ((s), 2)
826	# define graphene_simd4f_get_w(s) graphene_simd4f_get ((s), 3)
827
828	# define graphene_simd4f_splat(v) \
829	(__extension__ ({ \
830	(graphene_simd4f_t) { (v), (v), (v), (v) }; \
831	}))
832
833	# define graphene_simd4f_splat_x(v) \
834	(__extension__ ({ \
835	float __val = graphene_simd4f_get_x ((v)); \
836	(graphene_simd4f_t) { __val, __val, __val, __val }; \
837	}))
838
839	# define graphene_simd4f_splat_y(v) \
840	(__extension__ ({ \
841	float __val = graphene_simd4f_get_y ((v)); \
842	(graphene_simd4f_t) { __val, __val, __val, __val }; \
843	}))
844
845	# define graphene_simd4f_splat_z(v) \
846	(__extension__ ({ \
847	float __val = graphene_simd4f_get_z ((v)); \
848	(graphene_simd4f_t) { __val, __val, __val, __val }; \
849	}))
850
851	# define graphene_simd4f_splat_w(v) \
852	(__extension__ ({ \
853	float __val = graphene_simd4f_get_w ((v)); \
854	(graphene_simd4f_t) { __val, __val, __val, __val }; \
855	}))
856
857	# define graphene_simd4f_reciprocal(v) \
858	(__extension__ ({ \
859	(graphene_simd4f_t) { \
860	(v)[0] != 0.f ? 1.f / (v)[0] : 0.f, \
861	(v)[1] != 0.f ? 1.f / (v)[1] : 0.f, \
862	(v)[2] != 0.f ? 1.f / (v)[2] : 0.f, \
863	(v)[3] != 0.f ? 1.f / (v)[3] : 0.f, \
864	}; \
865	}))
866
867	# define graphene_simd4f_sqrt(v) \
868	(__extension__ ({ \
869	(graphene_simd4f_t) { \
870	sqrtf ((v)[0]), \
871	sqrtf ((v)[1]), \
872	sqrtf ((v)[2]), \
873	sqrtf ((v)[3]), \
874	}; \
875	}))
876
877	# define graphene_simd4f_rsqrt(v) \
878	(__extension__ ({ \
879	(graphene_simd4f_t) { \
880	(v)[0] != 0.f ? 1.f / sqrtf ((v)[0]) : 0.f, \
881	(v)[1] != 0.f ? 1.f / sqrtf ((v)[1]) : 0.f, \
882	(v)[2] != 0.f ? 1.f / sqrtf ((v)[2]) : 0.f, \
883	(v)[3] != 0.f ? 1.f / sqrtf ((v)[3]) : 0.f, \
884	}; \
885	}))
886
887	# define graphene_simd4f_add(a,b) (__extension__ ({ (graphene_simd4f_t) ((a) + (b)); }))
888	# define graphene_simd4f_sub(a,b) (__extension__ ({ (graphene_simd4f_t) ((a) - (b)); }))
889	# define graphene_simd4f_mul(a,b) (__extension__ ({ (graphene_simd4f_t) ((a) * (b)); }))
890	# define graphene_simd4f_div(a,b) (__extension__ ({ (graphene_simd4f_t) ((a) / (b)); }))
891
892	# define graphene_simd4f_cross3(a,b) \
893	(__extension__ ({ \
894	const graphene_simd4f_t __a = (a); \
895	const graphene_simd4f_t __b = (b); \
896	graphene_simd4f_init (__a[1] * __b[2] - __a[2] * __b[1], \
897	__a[2] * __b[0] - __a[0] * __b[2], \
898	__a[0] * __b[1] - __a[1] * __b[0], \
899	0.f); \
900	}))
901
902	# define graphene_simd4f_dot3(a,b) \
903	(__extension__ ({ \
904	const graphene_simd4f_t __a = (a); \
905	const graphene_simd4f_t __b = (b); \
906	const float __res = __a[0] * __b[0] + __a[1] * __b[1] + __a[2] * __b[2]; \
907	graphene_simd4f_init (__res, __res, __res, __res); \
908	}))
909
910	# define graphene_simd4f_dot3_scalar(a,b) \
911	(__extension__ ({ \
912	graphene_simd4f_get_x (graphene_simd4f_dot3 (a, b)); \
913	}))
914
915	# define graphene_simd4f_min(a,b) \
916	(__extension__ ({ \
917	const graphene_simd4f_t __a = (a); \
918	const graphene_simd4f_t __b = (b); \
919	graphene_simd4f_init (__a[0] < __b[0] ? __a[0] : __b[0], \
920	__a[1] < __b[1] ? __a[1] : __b[1], \
921	__a[2] < __b[2] ? __a[2] : __b[2], \
922	__a[3] < __b[3] ? __a[3] : __b[3]); \
923	}))
924
925	# define graphene_simd4f_max(a,b) \
926	(__extension__ ({ \
927	const graphene_simd4f_t __a = (a); \
928	const graphene_simd4f_t __b = (b); \
929	graphene_simd4f_init (__a[0] > __b[0] ? __a[0] : __b[0], \
930	__a[1] > __b[1] ? __a[1] : __b[1], \
931	__a[2] > __b[2] ? __a[2] : __b[2], \
932	__a[3] > __b[3] ? __a[3] : __b[3]); \
933	}))
934
935	# define graphene_simd4f_shuffle_wxyz(v) \
936	(__extension__ ({ \
937	const graphene_simd4i_t __mask = { 3, 0, 1, 2 }; \
938	(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
939	}))
940
941	# define graphene_simd4f_shuffle_zwxy(v) \
942	(__extension__ ({ \
943	const graphene_simd4i_t __mask = { 2, 3, 0, 1 }; \
944	(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
945	}))
946
947	# define graphene_simd4f_shuffle_yzwx(v) \
948	(__extension__ ({ \
949	const graphene_simd4i_t __mask = { 1, 2, 3, 0 }; \
950	(graphene_simd4f_t) __builtin_shuffle ((v), __mask); \
951	}))
952
953	# define graphene_simd4f_zero_w(v) \
954	(__extension__ ({ \
955	const graphene_simd4i_t __mask = { 0, 1, 2, 4 }; \
956	(graphene_simd4f_t) __builtin_shuffle ((v), graphene_simd4f_init_zero (), __mask); \
957	}))
958
959	# define graphene_simd4f_zero_zw(v) \
960	(__extension__ ({ \
961	const graphene_simd4i_t __mask = { 0, 1, 4, 4 }; \
962	(graphene_simd4f_t) __builtin_shuffle ((v), graphene_simd4f_init_zero (), __mask); \
963	}))
964
965	# define graphene_simd4f_merge_w(s,v) \
966	(__extension__ ({ \
967	const graphene_simd4i_t __mask = { 0, 1, 2, 4 }; \
968	(graphene_simd4f_t) __builtin_shuffle ((s), graphene_simd4f_splat ((v)), __mask); \
969	}))
970
971	# define graphene_simd4f_merge_high(a,b) \
972	(__extension__ ({ \
973	const graphene_simd4i_t __mask = { 2, 3, 6, 7 }; \
974	(graphene_simd4f_t) __builtin_shuffle ((a), (b), __mask); \
975	}))
976
977	# define graphene_simd4f_merge_low(a,b) \
978	(__extension__ ({ \
979	const graphene_simd4i_t __mask = { 0, 1, 4, 5 }; \
980	(graphene_simd4f_t) __builtin_shuffle ((a), (b), __mask); \
981	}))
982
983	# define graphene_simd4f_flip_sign_0101(v) \
984	(__extension__ ({ \
985	const graphene_simd4f_t __v = (v); \
986	graphene_simd4f_init (__v[0], -__v[1], __v[2], -__v[3]); \
987	}))
988
989	# define graphene_simd4f_flip_sign_1010(v) \
990	(__extension__ ({ \
991	const graphene_simd4f_t __v = (v); \
992	graphene_simd4f_init (-__v[0], __v[1], -__v[2], __v[3]); \
993	}))
994
995	# define graphene_simd4f_cmp_eq(a,b) \
996	(__extension__ ({ \
997	const graphene_simd4i_t __res = (a) == (b); \
998	(bool) (__res[0] != 0 && \
999	__res[1] != 0 && \
1000	__res[2] != 0 && \
1001	__res[3] != 0); \
1002	}))
1003
1004	# define graphene_simd4f_cmp_neq(a,b) (!graphene_simd4f_cmp_eq (a,b))
1005
1006	# define graphene_simd4f_cmp_lt(a,b) \
1007	(__extension__ ({ \
1008	const graphene_simd4i_t __res = (a) < (b); \
1009	(bool) (__res[0] != 0 && \
1010	__res[1] != 0 && \
1011	__res[2] != 0 && \
1012	__res[3] != 0); \
1013	}))
1014
1015	# define graphene_simd4f_cmp_le(a,b) \
1016	(__extension__ ({ \
1017	const graphene_simd4i_t __res = (a) <= (b); \
1018	(bool) (__res[0] != 0 && \
1019	__res[1] != 0 && \
1020	__res[2] != 0 && \
1021	__res[3] != 0); \
1022	}))
1023
1024	# define graphene_simd4f_cmp_ge(a,b) \
1025	(__extension__ ({ \
1026	const graphene_simd4i_t __res = (a) >= (b); \
1027	(bool) (__res[0] != 0 && \
1028	__res[1] != 0 && \
1029	__res[2] != 0 && \
1030	__res[3] != 0); \
1031	}))
1032
1033	# define graphene_simd4f_cmp_gt(a,b) \
1034	(__extension__ ({ \
1035	const graphene_simd4i_t __res = (a) > (b); \
1036	(bool) (__res[0] != 0 && \
1037	__res[1] != 0 && \
1038	__res[2] != 0 && \
1039	__res[3] != 0); \
1040	}))
1041
1042	# define graphene_simd4f_neg(s) \
1043	(__extension__ ({ \
1044	const graphene_simd4f_t __s = (s); \
1045	const graphene_simd4f_t __minus_one = graphene_simd4f_splat (-1.f); \
1046	graphene_simd4f_mul (__s, __minus_one); \
1047	}))
1048
1049	#elif !defined(__GI_SCANNER__) && defined(GRAPHENE_USE_ARM_NEON)
1050
1051	/ ARM Neon implementation of SIMD4f /
1052
1053	/ Union type used for single lane reading without memcpy /
1054	typedef union {
1055	graphene_simd4f_t s;
1056	float f[`4`];
1057	} graphene_simd4f_union_t;
1058
1059	/ NEON has optimised 2-lanes vectors we can use /
1060	typedef float32x2_t graphene_simd2f_t;
1061
1062	#ifdef __GNUC__
1063	# define graphene_simd4f_init(x,y,z,w) \
1064	(__extension__ ({ \
1065	const float32_t __v[4] = { (x), (y), (z), (w) }; \
1066	(graphene_simd4f_t) vld1q_f32 (__v); \
1067	}))
1068
1069	# define graphene_simd4f_init_zero() \
1070	(__extension__ ({ \
1071	(graphene_simd4f_t) vdupq_n_f32 (0.f); \
1072	}))
1073
1074	# define graphene_simd4f_init_4f(v) \
1075	(__extension__ ({ \
1076	const float32_t __v32 = (const float32_t ) (v); \
1077	(graphene_simd4f_t) vld1q_f32 (__v32); \
1078	}))
1079
1080	# define graphene_simd4f_init_3f(v) \
1081	(__extension__ ({ \
1082	graphene_simd4f_init (v[0], v[1], v[2], 0.f); \
1083	}))
1084
1085	# define graphene_simd4f_init_2f(v) \
1086	(__extension__ ({ \
1087	const float32_t __v32 = (const float32_t ) (v); \
1088	const graphene_simd2f_t __low = vld1_f32 (__v32); \
1089	const float32_t __zero = 0; \
1090	const graphene_simd2f_t __high = vld1_dup_f32 (&__zero); \
1091	(graphene_simd4f_t) vcombine_f32 (__low, __high); \
1092	}))
1093
1094	# define graphene_simd4f_dup_4f(s,v) \
1095	(__extension__ ({ \
1096	vst1q_f32 ((float32_t *) (v), (s)); \
1097	}))
1098
1099	# define graphene_simd4f_dup_3f(s,v) \
1100	(__extension__ ({ \
1101	float *__v = (v); \
1102	vst1q_lane_f32 (__v++, (s), 0); \
1103	vst1q_lane_f32 (__v++, (s), 1); \
1104	vst1q_lane_f32 (__v, (s), 2); \
1105	}))
1106
1107	# define graphene_simd4f_dup_2f(s,v) \
1108	(__extension__ ({ \
1109	const graphene_simd2f_t __low = vget_low_f32 ((s)); \
1110	vst1_f32 ((float32_t *) (v), __low); \
1111	}))
1112
1113	# define graphene_simd4f_get(s,i) \
1114	(__extension__ ({ \
1115	(float) vgetq_lane_f32 ((s), (i)); \
1116	}))
1117
1118	# define graphene_simd4f_splat(v) \
1119	(__extension__ ({ \
1120	(graphene_simd4f_t) vdupq_n_f32 ((v)); \
1121	}))
1122
1123	# define graphene_simd4f_splat_x(s) \
1124	(__extension__ ({ \
1125	graphene_simd4f_splat (graphene_simd4f_get_x ((s))); \
1126	}))
1127
1128	# define graphene_simd4f_splat_y(s) \
1129	(__extension__ ({ \
1130	graphene_simd4f_splat (graphene_simd4f_get_y ((s))); \
1131	}))
1132
1133	# define graphene_simd4f_splat_z(s) \
1134	(__extension__ ({ \
1135	graphene_simd4f_splat (graphene_simd4f_get_z ((s))); \
1136	}))
1137
1138	# define graphene_simd4f_splat_w(s) \
1139	(__extension__ ({ \
1140	graphene_simd4f_splat (graphene_simd4f_get_w ((s))); \
1141	}))
1142
1143	# define graphene_simd4f_reciprocal(s) \
1144	(__extension__ ({ \
1145	graphene_simd4f_t __est = vrecpeq_f32 ((s)); \
1146	__est = vmulq_f32 (vrecpsq_f32 (__est, (s)), __est); \
1147	(graphene_simd4f_t) vmulq_f32 (vrecpsq_f32 (__est, (s)), __est); \
1148	}))
1149
1150	# define graphene_simd4f_add(a,b) \
1151	(__extension__ ({ \
1152	(graphene_simd4f_t) vaddq_f32 ((a), (b)); \
1153	}))
1154
1155	# define graphene_simd4f_sub(a,b) \
1156	(__extension__ ({ \
1157	(graphene_simd4f_t) vsubq_f32 ((a), (b)); \
1158	}))
1159
1160	# define graphene_simd4f_mul(a,b) \
1161	(__extension__ ({ \
1162	(graphene_simd4f_t) vmulq_f32 ((a), (b)); \
1163	}))
1164
1165	# define graphene_simd4f_div(a,b) \
1166	(__extension__ ({ \
1167	graphene_simd4f_t __rec = graphene_simd4f_reciprocal ((b)); \
1168	(graphene_simd4f_t) vmulq_f32 ((a), __rec); \
1169	}))
1170
1171	# define _simd4f_rsqrt_iter(v,estimate) \
1172	(__extension__ ({ \
1173	const graphene_simd4f_t __est1 = vmulq_f32 ((estimate), (v)); \
1174	(graphene_simd4f_t) vmulq_f32 ((estimate), vrsqrtsq_f32 (__est1, (estimate))); \
1175	}))
1176
1177	# define graphene_simd4f_rsqrt(s) \
1178	(__extension__ ({ \
1179	graphene_simd4f_t __estimate = vrsqrteq_f32 ((s)); \
1180	__estimate = _simd4f_rsqrt_iter ((s), __estimate); \
1181	__estimate = _simd4f_rsqrt_iter ((s), __estimate); \
1182	_simd4f_rsqrt_iter ((s), __estimate); \
1183	}))
1184
1185	# define graphene_simd4f_sqrt(s) \
1186	(__extension__ ({ \
1187	graphene_simd4f_t __rsq = graphene_simd4f_rsqrt ((s)); \
1188	graphene_simd4f_t __rrsq = graphene_simd4f_reciprocal (__rsq); \
1189	uint32x4_t __tmp = vreinterpretq_u32_f32 ((s)); \
1190	(graphene_simd4f_t) vreinterpretq_f32_u32 (vandq_u32 (vtstq_u32 (__tmp, __tmp), vreinterpretq_u32_f32 (__rrsq))); \
1191	}))
1192
1193	# define graphene_simd4f_cross3(a,b) \
1194	(__extension__ ({ \
1195	const uint32_t __mask_bits[] = { 0xffffffff, 0xffffffff, 0xffffffff, 0 }; \
1196	const int32x4_t __mask = vld1q_s32 ((const int32_t *) __mask_bits); \
1197	const graphene_simd4f_t __a = (a), __b = (b); \
1198	const graphene_simd2f_t __a_low = vget_low_f32 (__a); \
1199	const graphene_simd2f_t __b_low = vget_low_f32 (__b); \
1200	const graphene_simd4f_t __a_yzx = vcombine_f32 (vext_f32 (__a_low, vget_high_f32 (__a), 1), __a_low); \
1201	const graphene_simd4f_t __b_yzx = vcombine_f32 (vext_f32 (__b_low, vget_high_f32 (__b), 1), __b_low); \
1202	graphene_simd4f_t __s3 = graphene_simd4f_sub (graphene_simd4f_mul (__b_yzx, __a), \
1203	graphene_simd4f_mul (__a_yzx, __b)); \
1204	graphene_simd2f_t __s3_low = vget_low_f32 (__s3); \
1205	__s3 = vcombine_f32 (vext_f32 (__s3_low, vget_high_f32 (__s3), 1), __s3_low); \
1206	(graphene_simd4f_t) vandq_s32 ((int32x4_t) __s3, __mask); \
1207	}))
1208
1209	# define graphene_simd4f_dot3(a,b) \
1210	(__extension__ ({ \
1211	graphene_simd4f_splat (graphene_simd4f_dot3_scalar (a, b)); \
1212	}))
1213
1214	# define graphene_simd4f_dot3_scalar(a,b) \
1215	(__extension__ ({ \
1216	const graphene_simd4f_t __m = graphene_simd4f_mul (a, b); \
1217	const graphene_simd2f_t __s1 = vpadd_f32 (vget_low_f32 (__m), vget_low_f32 (__m)); \
1218	(float) vget_lane_f32 (vadd_f32 (__s1, vget_high_f32 (__m)), 0); \
1219	}))
1220
1221	# define graphene_simd4f_min(a,b) \
1222	(__extension__ ({ \
1223	(graphene_simd4f_t) vminq_f32 ((a), (b)); \
1224	}))
1225
1226	# define graphene_simd4f_max(a,b) \
1227	(__extension__ ({ \
1228	(graphene_simd4f_t) vmaxq_f32 (a, b); \
1229	}))
1230
1231	# define graphene_simd4f_shuffle_wxyz(v) \
1232	(__extension__ ({ \
1233	graphene_simd4f_union_t __u = { (v) }; \
1234	graphene_simd4f_init (__u.f[3], __u.f[0], __u.f[1], __u.f[2]); \
1235	}))
1236
1237	# define graphene_simd4f_shuffle_zwxy(v) \
1238	(__extension__ ({ \
1239	graphene_simd4f_union_t __u = { (v) }; \
1240	graphene_simd4f_init (__u.f[2], __u.f[3], __u.f[0], __u.f[1]); \
1241	}))
1242
1243	# define graphene_simd4f_shuffle_yzwx(v) \
1244	(__extension__ ({ \
1245	graphene_simd4f_union_t __u = { (v) }; \
1246	graphene_simd4f_init (__u.f[1], __u.f[2], __u.f[3], __u.f[0]); \
1247	}))
1248
1249	# define graphene_simd4f_zero_w(v) \
1250	(__extension__ ({ \
1251	graphene_simd4f_union_t __u = { (v) }; \
1252	graphene_simd4f_init (__u.f[0], __u.f[1], __u.f[2], 0.f); \
1253	}))
1254
1255	# define graphene_simd4f_zero_zw(v) \
1256	(__extension__ ({ \
1257	graphene_simd4f_union_t __u = { (v) }; \
1258	graphene_simd4f_init (__u.f[0], __u.f[1], 0.f, 0.f); \
1259	}))
1260
1261	# define graphene_simd4f_merge_w(s,v) \
1262	(__extension__ ({ \
1263	graphene_simd4f_union_t __u = { (s) }; \
1264	graphene_simd4f_init (__u.f[0], __u.f[1], __u.f[2], (v)); \
1265	}))
1266
1267	# define graphene_simd4f_merge_high(a,b) \
1268	(__extension__ ({ \
1269	graphene_simd4f_union_t __u_a = { (a) }; \
1270	graphene_simd4f_union_t __u_b = { (b) }; \
1271	graphene_simd4f_init (__u_a.f[2], __u_a.f[3], __u_b.f[2], __u_b.f[3]); \
1272	}))
1273
1274	# define graphene_simd4f_merge_low(a,b) \
1275	(__extension__ ({ \
1276	graphene_simd4f_union_t __u_a = { (a) }; \
1277	graphene_simd4f_union_t __u_b = { (b) }; \
1278	graphene_simd4f_init (__u_a.f[0], __u_a.f[1], __u_b.f[0], __u_b.f[1]); \
1279	}))
1280
1281	# define graphene_simd4f_flip_sign_0101(s) \
1282	(__extension__ ({ \
1283	const unsigned int __upnpn[4] = { \
1284	0x00000000, \
1285	0x80000000, \
1286	0x00000000, \
1287	0x80000000 \
1288	}; \
1289	const uint32x4_t __pnpn = vld1q_u32 (__upnpn); \
1290	(graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __pnpn)); \
1291	}))
1292
1293	# define graphene_simd4f_flip_sign_1010(s) \
1294	(__extension__ ({ \
1295	const unsigned int __unpnp[4] = { \
1296	0x80000000, \
1297	0x00000000, \
1298	0x80000000, \
1299	0x00000000 \
1300	}; \
1301	const uint32x4_t __npnp = vld1q_u32 (__unpnp); \
1302	(graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __npnp)); \
1303	}))
1304
1305	# define graphene_simd4f_cmp_eq(a,b) \
1306	(__extension__ ({ \
1307	const uint32x4_t __mask = vceqq_f32 ((a), (b)); \
1308	(bool) (vgetq_lane_u32 (__mask, 0) != 0 && \
1309	vgetq_lane_u32 (__mask, 1) != 0 && \
1310	vgetq_lane_u32 (__mask, 2) != 0 && \
1311	vgetq_lane_u32 (__mask, 3) != 0); \
1312	}))
1313
1314	# define graphene_simd4f_cmp_neq(a,b) \
1315	(__extension__ ({ \
1316	const uint32x4_t __mask = vceqq_f32 ((a), (b)); \
1317	(bool) (vgetq_lane_u32 (__mask, 0) == 0 \|\| \
1318	vgetq_lane_u32 (__mask, 1) == 0 \|\| \
1319	vgetq_lane_u32 (__mask, 2) == 0 \|\| \
1320	vgetq_lane_u32 (__mask, 3) == 0); \
1321	}))
1322
1323	# define graphene_simd4f_cmp_lt(a,b) \
1324	(__extension__ ({ \
1325	const uint32x4_t __mask = vcltq_f32 ((a), (b)); \
1326	(bool) (vgetq_lane_u32 (__mask, 0) != 0 && \
1327	vgetq_lane_u32 (__mask, 1) != 0 && \
1328	vgetq_lane_u32 (__mask, 2) != 0 && \
1329	vgetq_lane_u32 (__mask, 3) != 0); \
1330	}))
1331
1332	# define graphene_simd4f_cmp_le(a,b) \
1333	(__extension__ ({ \
1334	const uint32x4_t __mask = vcleq_f32 ((a), (b)); \
1335	(bool) (vgetq_lane_u32 (__mask, 0) != 0 && \
1336	vgetq_lane_u32 (__mask, 1) != 0 && \
1337	vgetq_lane_u32 (__mask, 2) != 0 && \
1338	vgetq_lane_u32 (__mask, 3) != 0); \
1339	}))
1340
1341	# define graphene_simd4f_cmp_ge(a,b) \
1342	(__extension__ ({ \
1343	const uint32x4_t __mask = vcgeq_f32 ((a), (b)); \
1344	(bool) (vgetq_lane_u32 (__mask, 0) != 0 && \
1345	vgetq_lane_u32 (__mask, 1) != 0 && \
1346	vgetq_lane_u32 (__mask, 2) != 0 && \
1347	vgetq_lane_u32 (__mask, 3) != 0); \
1348	}))
1349
1350	# define graphene_simd4f_cmp_gt(a,b) \
1351	(__extension__ ({ \
1352	const uint32x4_t __mask = vcgtq_f32 ((a), (b)); \
1353	(bool) (vgetq_lane_u32 (__mask, 0) != 0 && \
1354	vgetq_lane_u32 (__mask, 1) != 0 && \
1355	vgetq_lane_u32 (__mask, 2) != 0 && \
1356	vgetq_lane_u32 (__mask, 3) != 0); \
1357	}))
1358
1359	# define graphene_simd4f_neg(s) \
1360	(__extension__ ({ \
1361	const unsigned int __umask[4] = { \
1362	0x80000000, \
1363	0x80000000, \
1364	0x80000000, \
1365	0x80000000 \
1366	}; \
1367	const uint32x4_t __mask = vld1q_u32 (__umask); \
1368	(graphene_simd4f_t) vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask)); \
1369	}))
1370
1371	#elif defined _MSC_VER /* Visual Studio ARM */
1372
1373	# define graphene_simd4f_init(x,y,z,w) _simd4f_init(x,y,z,w)
1374	static inline graphene_simd4f_t
1375	_simd4f_init (float x, float y, float z, float w)
1376	{
1377	const float32_t __v[`4`] = { (x), (y), (z), (w) };
1378	return vld1q_f32 (__v);
1379	}
1380
1381	# define graphene_simd4f_init_zero() vdupq_n_f32 (0.f)
1382
1383	# define graphene_simd4f_init_4f(v) vld1q_f32 (v)
1384
1385	# define graphene_simd4f_init_3f(v) graphene_simd4f_init (v[0], v[1], v[2], 0.f)
1386
1387	# define graphene_simd4f_init_2f(v) _simd4f_init_2f(v)
1388	static inline graphene_simd4f_t
1389	_simd4f_init_2f (const float *v)
1390	{
1391	const float32_t __v32 = (const* float32_t *) (v);
1392	const graphene_simd2f_t __low = vld1_f32 (__v32);
1393	const float32_t __zero = `0`;
1394	const graphene_simd2f_t __high = vld1_dup_f32 (&__zero);
1395	return vcombine_f32 (__low, __high);
1396	}
1397
1398	# define graphene_simd4f_dup_4f(s,v) vst1q_f32 ((float32_t *) (v), (s))
1399
1400	# define graphene_simd4f_dup_3f(s,v) _simd4f_dup_3f(s,v)
1401	static inline
1402	void _simd4f_dup_3f (const graphene_simd4f_t s,
1403	float *v)
1404	{
1405	float *__v = (v);
1406	vst1q_lane_f32 (__v++, (s), `0`);
1407	vst1q_lane_f32 (__v++, (s), `1`);
1408	vst1q_lane_f32 (__v, (s), `2`);
1409	}
1410
1411	# define graphene_simd4f_dup_2f(s,v) vst1_f32 (v, vget_low_f32 (s))
1412
1413	# define graphene_simd4f_get(s,i) vgetq_lane_f32 ((s), (i))
1414
1415	# define graphene_simd4f_splat(v) vdupq_n_f32 ((v))
1416
1417	# define graphene_simd4f_splat_x(s) graphene_simd4f_splat (graphene_simd4f_get_x ((s)))
1418
1419	# define graphene_simd4f_splat_y(s) graphene_simd4f_splat (graphene_simd4f_get_y ((s)))
1420
1421	# define graphene_simd4f_splat_z(s) graphene_simd4f_splat (graphene_simd4f_get_z ((s)))
1422
1423	# define graphene_simd4f_splat_w(s) graphene_simd4f_splat (graphene_simd4f_get_w ((s)))
1424
1425	# define graphene_simd4f_reciprocal(s) _simd4f_reciprocal(s)
1426	static inline graphene_simd4f_t
1427	_simd4f_reciprocal (const graphene_simd4f_t s)
1428	{
1429	graphene_simd4f_t __est = vrecpeq_f32 ((s));
1430	__est = vmulq_f32 (vrecpsq_f32 (__est, (s)), __est);
1431	return vmulq_f32 (vrecpsq_f32 (__est, (s)), __est);
1432	}
1433
1434	# define graphene_simd4f_add(a,b) vaddq_f32 ((a), (b))
1435
1436	# define graphene_simd4f_sub(a,b) vsubq_f32 ((a), (b))
1437
1438	# define graphene_simd4f_mul(a,b) vmulq_f32 ((a), (b))
1439
1440	# define graphene_simd4f_div(a,b) vmulq_f32 (a, graphene_simd4f_reciprocal (b))
1441
1442	static inline graphene_simd4f_t
1443	_simd4f_rsqrt_iter (const graphene_simd4f_t v,
1444	const graphene_simd4f_t estimate)
1445	{
1446	const graphene_simd4f_t __est1 = vmulq_f32 ((estimate), (v));
1447	return vmulq_f32 ((estimate), vrsqrtsq_f32 (__est1, (estimate)));
1448	}
1449
1450	# define graphene_simd4f_rsqrt(s) _simd4f_rsqrt(s)
1451	static inline graphene_simd4f_t
1452	_simd4f_rsqrt (const graphene_simd4f_t s)
1453	{
1454	graphene_simd4f_t __estimate = vrsqrteq_f32 ((s));
1455	__estimate = _simd4f_rsqrt_iter ((s), __estimate);
1456	__estimate = _simd4f_rsqrt_iter ((s), __estimate);
1457	return _simd4f_rsqrt_iter ((s), __estimate);
1458	}
1459
1460	# define graphene_simd4f_sqrt(s) _simd4f_sqrt(s)
1461	static inline graphene_simd4f_t
1462	_simd4f_sqrt (const graphene_simd4f_t s)
1463	{
1464	graphene_simd4f_t __rsq = graphene_simd4f_rsqrt ((s));
1465	graphene_simd4f_t __rrsq = graphene_simd4f_reciprocal (__rsq);
1466	uint32x4_t __tmp = vreinterpretq_u32_f32 ((s)); \
1467	return vreinterpretq_f32_u32 (vandq_u32 (vtstq_u32 (__tmp, __tmp), vreinterpretq_u32_f32 (__rrsq)));
1468	}
1469
1470	# define graphene_simd4f_cross3(a,b) _simd4f_cross3(a,b)
1471	static inline graphene_simd4f_t
1472	_simd4f_cross3 (const graphene_simd4f_t a,
1473	const graphene_simd4f_t b)
1474	{
1475	const uint32_t __mask_bits[] = { `0xffffffff`, `0xffffffff`, `0xffffffff`, `0` };
1476	const int32x4_t __mask = vld1q_s32 ((const int32_t *) __mask_bits);
1477	const graphene_simd4f_t __a = (a), __b = (b);
1478	const graphene_simd2f_t __a_low = vget_low_f32 (__a);
1479	const graphene_simd2f_t __b_low = vget_low_f32 (__b);
1480	const graphene_simd4f_t __a_yzx = vcombine_f32 (vext_f32 (__a_low, vget_high_f32 (__a), `1`), __a_low);
1481	const graphene_simd4f_t __b_yzx = vcombine_f32 (vext_f32 (__b_low, vget_high_f32 (__b), `1`), __b_low);
1482	graphene_simd4f_t __s3 = graphene_simd4f_sub (graphene_simd4f_mul (__b_yzx, __a),
1483	graphene_simd4f_mul (__a_yzx, __b));
1484	graphene_simd2f_t __s3_low = vget_low_f32 (__s3);
1485	__s3 = vcombine_f32 (vext_f32 (__s3_low, vget_high_f32 (__s3), `1`), __s3_low);
1486	return vandq_s32 (__s3, __mask);
1487	}
1488
1489	# define graphene_simd4f_dot3(a,b) graphene_simd4f_splat (graphene_simd4f_dot3_scalar (a, b))
1490
1491	# define graphene_simd4f_dot3_scalar(a,b) _simd4f_dot3_scalar(a,b)
1492	static inline float
1493	_simd4f_dot3_scalar (const graphene_simd4f_t a,
1494	const graphene_simd4f_t b)
1495	{
1496	const graphene_simd4f_t __m = graphene_simd4f_mul (a, b);
1497	const graphene_simd2f_t __s1 = vpadd_f32 (vget_low_f32 (__m), vget_low_f32 (__m));
1498	return vget_lane_f32 (vadd_f32 (__s1, vget_high_f32 (__m)), `0`);
1499	}
1500
1501	# define graphene_simd4f_min(a,b) vminq_f32 ((a), (b))
1502
1503	# define graphene_simd4f_max(a,b) vmaxq_f32 (a, b)
1504
1505	# define graphene_simd4f_shuffle_wxyz(v) _simd4f_shuffle_wxyz(v)
1506	static inline graphene_simd4f_t
1507	_simd4f_shuffle_wxyz (const graphene_simd4f_t v)
1508	{
1509	graphene_simd4f_union_t __u = { (v) };
1510	return graphene_simd4f_init (__u.f[`3`], __u.f[`0`], __u.f[`1`], __u.f[`2`]);
1511	}
1512
1513	# define graphene_simd4f_shuffle_zwxy(v) _simd4f_shuffle_zwxy(v)
1514	static inline graphene_simd4f_t
1515	_simd4f_shuffle_zwxy (const graphene_simd4f_t v)
1516	{
1517	graphene_simd4f_union_t __u = { (v) };
1518	return graphene_simd4f_init (__u.f[`2`], __u.f[`3`], __u.f[`0`], __u.f[`1`]);
1519	}
1520
1521	# define graphene_simd4f_shuffle_yzwx(v) _simd4f_shuffle_yzwx(v)
1522	static inline graphene_simd4f_t
1523	_simd4f_shuffle_yzwx (const graphene_simd4f_t v)
1524	{
1525	graphene_simd4f_union_t __u = { (v) };
1526	return graphene_simd4f_init (__u.f[`1`], __u.f[`2`], __u.f[`3`], __u.f[`0`]);
1527	}
1528
1529	# define graphene_simd4f_zero_w(v) _simd4f_zero_w(v)
1530	static inline graphene_simd4f_t
1531	_simd4f_zero_w (const graphene_simd4f_t v)
1532	{
1533	graphene_simd4f_union_t __u = { (v) };
1534	return graphene_simd4f_init (__u.f[`0`], __u.f[`1`], __u.f[`2`], `0.f`);
1535	}
1536
1537	# define graphene_simd4f_zero_zw(v) _simd4f_zero_zw(v)
1538	static inline graphene_simd4f_t
1539	_simd4f_zero_zw (const graphene_simd4f_t v)
1540	{
1541	graphene_simd4f_union_t __u = { (v) };
1542	return graphene_simd4f_init (__u.f[`0`], __u.f[`1`], `0.f`, `0.f`);
1543	}
1544
1545	# define graphene_simd4f_merge_w(s,v) _simd4f_merge_w(s,v)
1546	static inline graphene_simd4f_t
1547	_simd4f_merge_w (const graphene_simd4f_t s,
1548	float v)
1549	{
1550	graphene_simd4f_union_t __u = { (s) };
1551	return graphene_simd4f_init (__u.f[`0`], __u.f[`1`], __u.f[`2`], (v));
1552	}
1553
1554	# define graphene_simd4f_merge_high(a,b) _simd4f_merge_high(a,b)
1555	static inline graphene_simd4f_t
1556	_simd4f_merge_high (const graphene_simd4f_t a,
1557	const graphene_simd4f_t b)
1558	{
1559	graphene_simd4f_union_t __u_a = { (a) };
1560	graphene_simd4f_union_t __u_b = { (b) };
1561	return graphene_simd4f_init (__u_a.f[`2`], __u_a.f[`3`], __u_b.f[`2`], __u_b.f[`3`]);
1562	}
1563
1564	# define graphene_simd4f_merge_low(a,b) _simd4f_merge_low(a,b)
1565	static inline graphene_simd4f_t
1566	_simd4f_merge_low (const graphene_simd4f_t a,
1567	const graphene_simd4f_t b)
1568	{
1569	graphene_simd4f_union_t __u_a = { (a) };
1570	graphene_simd4f_union_t __u_b = { (b) };
1571	return graphene_simd4f_init (__u_a.f[`0`], __u_a.f[`1`], __u_b.f[`0`], __u_b.f[`1`]);
1572	}
1573
1574
1575	# define graphene_simd4f_flip_sign_0101(s) _simd4f_flip_sign_0101(s)
1576	static inline graphene_simd4f_t
1577	_simd4f_flip_sign_0101 (const graphene_simd4f_t s)
1578	{
1579	const unsigned int __upnpn[`4`] = {
1580	`0x00000000`,
1581	`0x80000000`,
1582	`0x00000000`,
1583	`0x80000000`
1584	};
1585	const uint32x4_t __pnpn = vld1q_u32 (__upnpn);
1586	return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __pnpn));
1587	}
1588
1589	# define graphene_simd4f_flip_sign_1010(s) _simd4f_flip_sign_1010(s)
1590	static inline graphene_simd4f_t
1591	_simd4f_flip_sign_1010 (const graphene_simd4f_t s)
1592	{
1593	const unsigned int __unpnp[`4`] = {
1594	`0x80000000`,
1595	`0x00000000`,
1596	`0x80000000`,
1597	`0x00000000`
1598	};
1599
1600	const uint32x4_t __npnp = vld1q_u32 (__unpnp);
1601	return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __npnp));
1602	}
1603
1604	# define graphene_simd4f_cmp_eq(a,b) _simd4f_cmp_eq(a,b)
1605	static inline bool
1606	_simd4f_cmp_eq (const graphene_simd4f_t a,
1607	const graphene_simd4f_t b)
1608	{
1609	const uint32x4_t __mask = vceqq_f32 ((a), (b));
1610	return (vgetq_lane_u32 (__mask, `0`) != `0` &&
1611	vgetq_lane_u32 (__mask, `1`) != `0` &&
1612	vgetq_lane_u32 (__mask, `2`) != `0` &&
1613	vgetq_lane_u32 (__mask, `3`) != `0`);
1614	}
1615
1616	# define graphene_simd4f_cmp_neq(a,b) _simd4f_cmp_neq(a,b)
1617	static inline bool
1618	_simd4f_cmp_neq (const graphene_simd4f_t a,
1619	const graphene_simd4f_t b)
1620	{
1621	const uint32x4_t __mask = vceqq_f32 ((a), (b));
1622	return (vgetq_lane_u32 (__mask, `0`) == `0` \|\|
1623	vgetq_lane_u32 (__mask, `1`) == `0` \|\|
1624	vgetq_lane_u32 (__mask, `2`) == `0` \|\|
1625	vgetq_lane_u32 (__mask, `3`) == `0`);
1626	}
1627
1628	# define graphene_simd4f_cmp_lt(a,b) _simd4f_cmp_lt(a,b)
1629	static inline bool
1630	_simd4f_cmp_lt (const graphene_simd4f_t a,
1631	const graphene_simd4f_t b)
1632	{
1633	const uint32x4_t __mask = vcltq_f32 ((a), (b));
1634	return (vgetq_lane_u32 (__mask, `0`) != `0` &&
1635	vgetq_lane_u32 (__mask, `1`) != `0` &&
1636	vgetq_lane_u32 (__mask, `2`) != `0` &&
1637	vgetq_lane_u32 (__mask, `3`) != `0`);
1638	}
1639
1640	# define graphene_simd4f_cmp_le(a,b) _simd4f_cmp_le(a,b)
1641	static inline bool
1642	_simd4f_cmp_le (const graphene_simd4f_t a,
1643	const graphene_simd4f_t b)
1644	{
1645	const uint32x4_t __mask = vcleq_f32 ((a), (b));
1646	return (vgetq_lane_u32 (__mask, `0`) != `0` &&
1647	vgetq_lane_u32 (__mask, `1`) != `0` &&
1648	vgetq_lane_u32 (__mask, `2`) != `0` &&
1649	vgetq_lane_u32 (__mask, `3`) != `0`);
1650	}
1651
1652	# define graphene_simd4f_cmp_ge(a,b) _simd4f_cmp_ge(a,b)
1653	static inline bool
1654	_simd4f_cmp_ge (const graphene_simd4f_t a,
1655	const graphene_simd4f_t b)
1656	{
1657	const uint32x4_t __mask = vcgeq_f32 ((a), (b));
1658	return (vgetq_lane_u32 (__mask, `0`) != `0` &&
1659	vgetq_lane_u32 (__mask, `1`) != `0` &&
1660	vgetq_lane_u32 (__mask, `2`) != `0` &&
1661	vgetq_lane_u32 (__mask, `3`) != `0`);
1662	}
1663
1664	# define graphene_simd4f_cmp_gt(a,b) _simd4f_cmp_gt(a,b)
1665	static inline bool
1666	_simd4f_cmp_gt (const graphene_simd4f_t a,
1667	const graphene_simd4f_t b)
1668	{
1669	const uint32x4_t __mask = vcgtq_f32 ((a), (b));
1670	return (vgetq_lane_u32 (__mask, `0`) != `0` &&
1671	vgetq_lane_u32 (__mask, `1`) != `0` &&
1672	vgetq_lane_u32 (__mask, `2`) != `0` &&
1673	vgetq_lane_u32 (__mask, `3`) != `0`);
1674	}
1675
1676	# define graphene_simd4f_neg(s) _simd4f_neg(s)
1677	static inline graphene_simd4f_t
1678	_simd4f_neg (const graphene_simd4f_t s)
1679	{
1680	const unsigned int __umask[`4`] = {
1681	`0x80000000`,
1682	`0x80000000`,
1683	`0x80000000`,
1684	`0x80000000`
1685	};
1686	const uint32x4_t __mask = vld1q_u32 (__umask);
1687	return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 ((s)), __mask));
1688	}
1689
1690	#else /* ARM NEON intrinsics-not GCC or Visual Studio */
1691
1692	# error "Need GCC-compatible or Visual Studio compiler for ARM NEON extensions."
1693
1694	/ Use static inline to inline all these functions /
1695
1696	# endif /* !__GNUC__ && !_MSC_VER */
1697
1698	/ macros that are not compiler-dependent /
1699	# define graphene_simd4f_get_x(s) graphene_simd4f_get (s, 0)
1700	# define graphene_simd4f_get_y(s) graphene_simd4f_get (s, 1)
1701	# define graphene_simd4f_get_z(s) graphene_simd4f_get (s, 2)
1702	# define graphene_simd4f_get_w(s) graphene_simd4f_get (s, 3)
1703
1704	#elif defined(__GI_SCANNER__) \|\| defined(GRAPHENE_USE_SCALAR)
1705
1706	/ Fallback implementation using scalar types /
1707
1708	#define graphene_simd4f_init(x,y,z,w) \
1709	(graphene_simd4f_init ((x), (y), (z), (w)))
1710	#define graphene_simd4f_init_zero() \
1711	(graphene_simd4f_init_zero ())
1712	#define graphene_simd4f_init_4f(v) \
1713	(graphene_simd4f_init_4f ((const float *) (v)))
1714	#define graphene_simd4f_init_3f(v) \
1715	(graphene_simd4f_init_3f ((const float *) (v)))
1716	#define graphene_simd4f_init_2f(v) \
1717	(graphene_simd4f_init_2f ((const float *) (v)))
1718	#define graphene_simd4f_dup_4f(s,v) \
1719	(graphene_simd4f_dup_4f ((s), (float *) (v)))
1720	#define graphene_simd4f_dup_3f(s,v) \
1721	(graphene_simd4f_dup_3f ((s), (float *) (v)))
1722	#define graphene_simd4f_dup_2f(s,v) \
1723	(graphene_simd4f_dup_2f ((s), (float *) (v)))
1724	#define graphene_simd4f_get(s,i) \
1725	(graphene_simd4f_get ((s), (i)))
1726	#define graphene_simd4f_get_x(s) \
1727	(graphene_simd4f_get_x ((s)))
1728	#define graphene_simd4f_get_y(s) \
1729	(graphene_simd4f_get_y ((s)))
1730	#define graphene_simd4f_get_z(s) \
1731	(graphene_simd4f_get_z ((s)))
1732	#define graphene_simd4f_get_w(s) \
1733	(graphene_simd4f_get_w ((s)))
1734	#define graphene_simd4f_splat(v) \
1735	(graphene_simd4f_splat ((v)))
1736	#define graphene_simd4f_splat_x(s) \
1737	(graphene_simd4f_splat_x ((s)))
1738	#define graphene_simd4f_splat_y(s) \
1739	(graphene_simd4f_splat_y ((s)))
1740	#define graphene_simd4f_splat_z(s) \
1741	(graphene_simd4f_splat_z ((s)))
1742	#define graphene_simd4f_splat_w(s) \
1743	(graphene_simd4f_splat_w ((s)))
1744	#define graphene_simd4f_add(a,b) \
1745	(graphene_simd4f_add ((a), (b)))
1746	#define graphene_simd4f_sub(a,b) \
1747	(graphene_simd4f_sub ((a), (b)))
1748	#define graphene_simd4f_mul(a,b) \
1749	(graphene_simd4f_mul ((a), (b)))
1750	#define graphene_simd4f_div(a,b) \
1751	(graphene_simd4f_div ((a), (b)))
1752	#define graphene_simd4f_sqrt(s) \
1753	(graphene_simd4f_sqrt ((s)))
1754	#define graphene_simd4f_rsqrt(s) \
1755	(graphene_simd4f_rsqrt ((s)))
1756	#define graphene_simd4f_reciprocal(s) \
1757	(graphene_simd4f_reciprocal ((s)))
1758	#define graphene_simd4f_cross3(a,b) \
1759	(graphene_simd4f_cross3 ((a), (b)))
1760	#define graphene_simd4f_dot3(a,b) \
1761	(graphene_simd4f_dot3 ((a), (b)))
1762	#define graphene_simd4f_dot3_scalar(a,b) \
1763	(graphene_simd4f_dot3_scalar ((a), (b)))
1764	#define graphene_simd4f_min(a,b) \
1765	(graphene_simd4f_min ((a), (b)))
1766	#define graphene_simd4f_max(a,b) \
1767	(graphene_simd4f_max ((a), (b)))
1768	#define graphene_simd4f_shuffle_wxyz(s) \
1769	(graphene_simd4f_shuffle_wxyz ((s)))
1770	#define graphene_simd4f_shuffle_zwxy(s) \
1771	(graphene_simd4f_shuffle_zwxy ((s)))
1772	#define graphene_simd4f_shuffle_yzwx(s) \
1773	(graphene_simd4f_shuffle_yzwx ((s)))
1774	#define graphene_simd4f_flip_sign_0101(s) \
1775	(graphene_simd4f_flip_sign_0101 ((s)))
1776	#define graphene_simd4f_flip_sign_1010(s) \
1777	(graphene_simd4f_flip_sign_1010 ((s)))
1778	#define graphene_simd4f_zero_w(v) \
1779	(graphene_simd4f_zero_w ((v)))
1780	#define graphene_simd4f_zero_zw(v) \
1781	(graphene_simd4f_zero_zw ((v)))
1782	#define graphene_simd4f_merge_w(s,v) \
1783	(graphene_simd4f_merge_w ((s), (v)))
1784	#define graphene_simd4f_merge_high(a,b) \
1785	(graphene_simd4f_merge_high ((a), (b)))
1786	#define graphene_simd4f_merge_low(a,b) \
1787	(graphene_simd4f_merge_low ((a), (b)))
1788	#define graphene_simd4f_cmp_eq(a,b) \
1789	(graphene_simd4f_cmp_eq ((a), (b)))
1790	#define graphene_simd4f_cmp_neq(a,b) \
1791	(graphene_simd4f_cmp_neq ((a), (b)))
1792	#define graphene_simd4f_cmp_lt(a,b) \
1793	(graphene_simd4f_cmp_lt ((a), (b)))
1794	#define graphene_simd4f_cmp_le(a,b) \
1795	(graphene_simd4f_cmp_le ((a), (b)))
1796	#define graphene_simd4f_cmp_ge(a,b) \
1797	(graphene_simd4f_cmp_ge ((a), (b)))
1798	#define graphene_simd4f_cmp_gt(a,b) \
1799	(graphene_simd4f_cmp_gt ((a), (b)))
1800	#define graphene_simd4f_neg(s) \
1801	(graphene_simd4f_neg ((s)))
1802
1803	#else
1804	# error "Unsupported simd4f implementation."
1805	#endif
1806
1807	/ Generic operations, inlined /
1808
1809	/**
1810	* graphene_simd4f_madd:
1811	* @m1: a #graphene_simd4f_t
1812	* @m2: a #graphene_simd4f_t
1813	* @a: a #graphene_simd4f_t
1814	*
1815	* Adds @a to the product of @m1 and @m2.
1816	*
1817	* Returns: the result vector
1818	*
1819	* Since: 1.0
1820	*/
1821	static inline graphene_simd4f_t
1822	graphene_simd4f_madd (const graphene_simd4f_t m1,
1823	const graphene_simd4f_t m2,
1824	const graphene_simd4f_t a)
1825	{
1826	return graphene_simd4f_add (graphene_simd4f_mul (m1, m2), a);
1827	}
1828
1829	/**
1830	* graphene_simd4f_sum:
1831	* @v: a #graphene_simd4f_t
1832	*
1833	* Sums all components of the given vector.
1834	*
1835	* Returns: a vector with all components set to be the
1836	* sum of the passed #graphene_simd4f_t
1837	*
1838	* Since: 1.0
1839	*/
1840	static inline graphene_simd4f_t
1841	graphene_simd4f_sum (const graphene_simd4f_t v)
1842	{
1843	const graphene_simd4f_t x = graphene_simd4f_splat_x (v);
1844	const graphene_simd4f_t y = graphene_simd4f_splat_y (v);
1845	const graphene_simd4f_t z = graphene_simd4f_splat_z (v);
1846	const graphene_simd4f_t w = graphene_simd4f_splat_w (v);
1847
1848	return graphene_simd4f_add (graphene_simd4f_add (x, y),
1849	graphene_simd4f_add (z, w));
1850	}
1851
1852	/**
1853	* graphene_simd4f_sum_scalar:
1854	* @v: a #graphene_simd4f_t
1855	*
1856	* Sums all the components of the given vector.
1857	*
1858	* Returns: a scalar value with the sum of the components
1859	* of the given #graphene_simd4f_t
1860	*
1861	* Since: 1.0
1862	*/
1863	static inline float
1864	graphene_simd4f_sum_scalar (const graphene_simd4f_t v)
1865	{
1866	return graphene_simd4f_get_x (graphene_simd4f_sum (v));
1867	}
1868
1869	/**
1870	* graphene_simd4f_dot4:
1871	* @a: a #graphene_simd4f_t
1872	* @b: a #graphene_simd4f_t
1873	*
1874	* Computes the dot product of all the components of the two
1875	* given #graphene_simd4f_t.
1876	*
1877	* Returns: a vector whose components are all set to be the
1878	* dot product of the components of the two operands
1879	*
1880	* Since: 1.0
1881	*/
1882	static inline graphene_simd4f_t
1883	graphene_simd4f_dot4 (const graphene_simd4f_t a,
1884	const graphene_simd4f_t b)
1885	{
1886	return graphene_simd4f_sum (graphene_simd4f_mul (a, b));
1887	}
1888
1889	/**
1890	* graphene_simd4f_dot2:
1891	* @a: a #graphene_simd4f_t
1892	* @b: a #graphene_simd4f_t
1893	*
1894	* Computes the dot product of the first two components of the
1895	* two given #graphene_simd4f_t.
1896	*
1897	* Returns: a vector whose components are all set to the
1898	* dot product of the components of the two operands
1899	*
1900	* Since: 1.0
1901	*/
1902	static inline graphene_simd4f_t
1903	graphene_simd4f_dot2 (const graphene_simd4f_t a,
1904	const graphene_simd4f_t b)
1905	{
1906	const graphene_simd4f_t m = graphene_simd4f_mul (a, b);
1907	const graphene_simd4f_t x = graphene_simd4f_splat_x (m);
1908	const graphene_simd4f_t y = graphene_simd4f_splat_y (m);
1909
1910	return graphene_simd4f_add (x, y);
1911	}
1912
1913	/**
1914	* graphene_simd4f_length4:
1915	* @v: a #graphene_simd4f_t
1916	*
1917	* Computes the length of the given #graphene_simd4f_t vector,
1918	* using all four of its components.
1919	*
1920	* Returns: the length vector
1921	*
1922	* Since: 1.0
1923	*/
1924	static inline graphene_simd4f_t
1925	graphene_simd4f_length4 (const graphene_simd4f_t v)
1926	{
1927	return graphene_simd4f_sqrt (graphene_simd4f_dot4 (v, v));
1928	}
1929
1930	/**
1931	* graphene_simd4f_length3:
1932	* @v: a #graphene_simd4f_t
1933	*
1934	* Computes the length of the given #graphene_simd4f_t vector,
1935	* using the first three of its components.
1936	*
1937	* Returns: the length vector
1938	*
1939	* Since: 1.0
1940	*/
1941	static inline graphene_simd4f_t
1942	graphene_simd4f_length3 (const graphene_simd4f_t v)
1943	{
1944	return graphene_simd4f_sqrt (graphene_simd4f_dot3 (v, v));
1945	}
1946
1947	/**
1948	* graphene_simd4f_length2:
1949	* @v: a #graphene_simd4f_t
1950	*
1951	* Computes the length of the given #graphene_simd4f_t vector,
1952	* using the first two of its components.
1953	*
1954	* Returns: the length vector
1955	*
1956	* Since: 1.0
1957	*/
1958	static inline graphene_simd4f_t
1959	graphene_simd4f_length2 (const graphene_simd4f_t v)
1960	{
1961	return graphene_simd4f_sqrt (graphene_simd4f_dot2 (v, v));
1962	}
1963
1964	/**
1965	* graphene_simd4f_normalize4:
1966	* @v: a #graphene_simd4f_t
1967	*
1968	* Computes the normalization of the given #graphene_simd4f_t vector,
1969	* using all of its components.
1970	*
1971	* Returns: the normalized vector
1972	*
1973	* Since: 1.0
1974	*/
1975	static inline graphene_simd4f_t
1976	graphene_simd4f_normalize4 (const graphene_simd4f_t v)
1977	{
1978	graphene_simd4f_t invlen = graphene_simd4f_rsqrt (graphene_simd4f_dot4 (v, v));
1979	return graphene_simd4f_mul (v, invlen);
1980	}
1981
1982	/**
1983	* graphene_simd4f_normalize3:
1984	* @v: a #graphene_simd4f_t
1985	*
1986	* Computes the normalization of the given #graphene_simd4f_t vector,
1987	* using the first three of its components.
1988	*
1989	* Returns: the normalized vector
1990	*
1991	* Since: 1.0
1992	*/
1993	static inline graphene_simd4f_t
1994	graphene_simd4f_normalize3 (const graphene_simd4f_t v)
1995	{
1996	graphene_simd4f_t invlen = graphene_simd4f_rsqrt (graphene_simd4f_dot3 (v, v));
1997	return graphene_simd4f_mul (v, invlen);
1998	}
1999
2000	/**
2001	* graphene_simd4f_normalize2:
2002	* @v: a #graphene_simd4f_t
2003	*
2004	* Computes the normalization of the given #graphene_simd4f_t vector,
2005	* using the first two of its components.
2006	*
2007	* Returns: the normalized vector
2008	*
2009	* Since: 1.0
2010	*/
2011	static inline graphene_simd4f_t
2012	graphene_simd4f_normalize2 (const graphene_simd4f_t v)
2013	{
2014	graphene_simd4f_t invlen = graphene_simd4f_rsqrt (graphene_simd4f_dot2 (v, v));
2015	return graphene_simd4f_mul (v, invlen);
2016	}
2017
2018	/**
2019	* graphene_simd4f_is_zero4:
2020	* @v: a #graphene_simd4f_t
2021	*
2022	* Checks whether the given #graphene_simd4f_t has all its components
2023	* set to 0.
2024	*
2025	* Returns: `true` if all the vector components are zero
2026	*
2027	* Since: 1.0
2028	*/
2029	static inline bool
2030	graphene_simd4f_is_zero4 (const graphene_simd4f_t v)
2031	{
2032	graphene_simd4f_t zero = graphene_simd4f_init_zero ();
2033	return graphene_simd4f_cmp_eq (v, zero);
2034	}
2035
2036	/**
2037	* graphene_simd4f_is_zero3:
2038	* @v: a #graphene_simd4f_t
2039	*
2040	* Checks whether the given #graphene_simd4f_t has the first three of
2041	* its components set to 0.
2042	*
2043	* Returns: `true` if the vector's components are zero
2044	*
2045	* Since: 1.0
2046	*/
2047	static inline bool
2048	graphene_simd4f_is_zero3 (const graphene_simd4f_t v)
2049	{
2050	return fabsf (graphene_simd4f_get_x (v)) <= FLT_EPSILON &&
2051	fabsf (graphene_simd4f_get_y (v)) <= FLT_EPSILON &&
2052	fabsf (graphene_simd4f_get_z (v)) <= FLT_EPSILON;
2053	}
2054
2055	/**
2056	* graphene_simd4f_is_zero2:
2057	* @v: a #graphene_simd4f_t
2058	*
2059	* Checks whether the given #graphene_simd4f_t has the first two of
2060	* its components set to 0.
2061	*
2062	* Returns: `true` if the vector's components are zero
2063	*
2064	* Since: 1.0
2065	*/
2066	static inline bool
2067	graphene_simd4f_is_zero2 (const graphene_simd4f_t v)
2068	{
2069	return fabsf (graphene_simd4f_get_x (v)) <= FLT_EPSILON &&
2070	fabsf (graphene_simd4f_get_y (v)) <= FLT_EPSILON;
2071	}
2072
2073	/**
2074	* graphene_simd4f_interpolate:
2075	* @a: a #graphene_simd4f_t
2076	* @b: a #graphene_simd4f_t
2077	* @f: the interpolation factor
2078	*
2079	* Linearly interpolates all components of the two given
2080	* #graphene_simd4f_t vectors using the given factor @f.
2081	*
2082	* Returns: the intrerpolated vector
2083	*
2084	* Since: 1.0
2085	*/
2086	static inline graphene_simd4f_t
2087	graphene_simd4f_interpolate (const graphene_simd4f_t a,
2088	const graphene_simd4f_t b,
2089	float f)
2090	{
2091	const graphene_simd4f_t one_minus_f = graphene_simd4f_sub (graphene_simd4f_splat (`1.f`),
2092	graphene_simd4f_splat (f));
2093
2094	return graphene_simd4f_add (graphene_simd4f_mul (one_minus_f, a),
2095	graphene_simd4f_mul (graphene_simd4f_splat (f), b));
2096	}
2097
2098	/**
2099	* graphene_simd4f_clamp:
2100	* @v: a #graphene_simd4f_t
2101	* @min: the lower boundary
2102	* @max: the upper boundary
2103	*
2104	* Ensures that all components of the vector @v are within
2105	* the components of the @lower and @upper boundaries.
2106	*
2107	* Returns: the clamped vector
2108	*
2109	* Since: 1.2
2110	*/
2111	static inline graphene_simd4f_t
2112	graphene_simd4f_clamp (const graphene_simd4f_t v,
2113	const graphene_simd4f_t min,
2114	const graphene_simd4f_t max)
2115	{
2116	const graphene_simd4f_t tmp = graphene_simd4f_max (min, v);
2117
2118	return graphene_simd4f_min (tmp, max);
2119	}
2120
2121	/**
2122	* graphene_simd4f_clamp_scalar:
2123	* @v: a #graphene_simd4f_t
2124	* @min: the lower boundary
2125	* @max: the upper boundary
2126	*
2127	* Ensures that all components of the vector @v are within
2128	* the @lower and @upper boundary scalar values.
2129	*
2130	* Returns: the clamped vector
2131	*
2132	* Since: 1.2
2133	*/
2134	static inline graphene_simd4f_t
2135	graphene_simd4f_clamp_scalar (const graphene_simd4f_t v,
2136	float min,
2137	float max)
2138	{
2139	return graphene_simd4f_clamp (v,
2140	graphene_simd4f_splat (min),
2141	graphene_simd4f_splat (max));
2142	}
2143
2144	/**
2145	* graphene_simd4f_min_val:
2146	* @v: a #graphene_simd4f_t
2147	*
2148	* Computes the minimum value of all the channels in the given vector.
2149	*
2150	* Returns: a vector whose components are all set to the
2151	* minimum value in the original vector
2152	*
2153	* Since: 1.4
2154	*/
2155	static inline graphene_simd4f_t
2156	graphene_simd4f_min_val (const graphene_simd4f_t v)
2157	{
2158	graphene_simd4f_t s = v;
2159
2160	s = graphene_simd4f_min (s, graphene_simd4f_shuffle_wxyz (s));
2161	s = graphene_simd4f_min (s, graphene_simd4f_shuffle_zwxy (s));
2162
2163	return s;
2164	}
2165
2166	/**
2167	* graphene_simd4f_max_val:
2168	* @v: a #graphene_simd4f_t
2169	*
2170	* Computes the maximum value of all the channels in the given vector.
2171	*
2172	* Returns: a vector whose components are all set to the
2173	* maximum value in the original vector
2174	*
2175	* Since: 1.4
2176	*/
2177	static inline graphene_simd4f_t
2178	graphene_simd4f_max_val (const graphene_simd4f_t v)
2179	{
2180	graphene_simd4f_t s = v;
2181
2182	s = graphene_simd4f_max (s, graphene_simd4f_shuffle_wxyz (s));
2183	s = graphene_simd4f_max (s, graphene_simd4f_shuffle_zwxy (s));
2184
2185	return s;
2186	}
2187
2188	GRAPHENE_END_DECLS
2189

source code of gtk/subprojects/graphene/include/graphene-simd4f.h