smmintrin.h source code [clang/lib/Headers/ppc_wrappers/smmintrin.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	/*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
2	*
3	* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	* See https://llvm.org/LICENSE.txt for license information.
5	* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	*
7	*===-----------------------------------------------------------------------===
8	*/
9
10	/* Implemented from the specification included in the Intel C++ Compiler
11	User Guide and Reference, version 9.0.
12
13	NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
14
15	#ifndef NO_WARN_X86_INTRINSICS
16	/* This header is distributed to simplify porting x86_64 code that
17	makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
18
19	It is the user's responsibility to determine if the results are
20	acceptable and make additional changes as necessary.
21
22	Note that much code that uses Intel intrinsics can be rewritten in
23	standard C or GNU C extensions, which are more portable and better
24	optimized across multiple targets. */
25	#error \
26	"Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
27	#endif
28
29	#ifndef SMMINTRIN_H_
30	#define SMMINTRIN_H_
31
32	#if defined(__powerpc64__) && \
33	(defined(__linux__) \|\| defined(__FreeBSD__) \|\| defined(_AIX))
34
35	#include <altivec.h>
36	#include <tmmintrin.h>
37
38	/* Rounding mode macros. */
39	#define _MM_FROUND_TO_NEAREST_INT 0x00
40	#define _MM_FROUND_TO_ZERO 0x01
41	#define _MM_FROUND_TO_POS_INF 0x02
42	#define _MM_FROUND_TO_NEG_INF 0x03
43	#define _MM_FROUND_CUR_DIRECTION 0x04
44
45	#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_RAISE_EXC)
46	#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF \| _MM_FROUND_RAISE_EXC)
47	#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF \| _MM_FROUND_RAISE_EXC)
48	#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO \| _MM_FROUND_RAISE_EXC)
49	#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION \| _MM_FROUND_RAISE_EXC)
50	#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION \| _MM_FROUND_NO_EXC)
51
52	#define _MM_FROUND_RAISE_EXC 0x00
53	#define _MM_FROUND_NO_EXC 0x08
54
55	extern __inline __m128d
56	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
57	_mm_round_pd(__m128d __A, int __rounding) {
58	__v2df __r;
59	union {
60	double __fr;
61	long long __fpscr;
62	} __enables_save, __fpscr_save;
63
64	if (__rounding & _MM_FROUND_NO_EXC) {
65	/* Save enabled exceptions, disable all exceptions,
66	and preserve the rounding mode. */
67	#ifdef _ARCH_PWR9
68	__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
69	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
70	#else
71	__fpscr_save.__fr = __builtin_ppc_mffs();
72	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
73	__fpscr_save.__fpscr &= ~0xf8;
74	__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
75	#endif
76	/* Insert an artificial "read/write" reference to the variable
77	read below, to ensure the compiler does not schedule
78	a read/use of the variable before the FPSCR is modified, above.
79	This can be removed if and when GCC PR102783 is fixed.
80	*/
81	__asm__("" : "+wa"(__A));
82	}
83
84	switch (__rounding) {
85	case _MM_FROUND_TO_NEAREST_INT:
86	#ifdef _ARCH_PWR9
87	__fpscr_save.__fr = __builtin_ppc_mffsl();
88	#else
89	__fpscr_save.__fr = __builtin_ppc_mffs();
90	__fpscr_save.__fpscr &= 0x70007f0ffL;
91	#endif
92	__attribute__((fallthrough));
93	case _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC:
94	__builtin_ppc_set_fpscr_rn(0b00);
95	/* Insert an artificial "read/write" reference to the variable
96	read below, to ensure the compiler does not schedule
97	a read/use of the variable before the FPSCR is modified, above.
98	This can be removed if and when GCC PR102783 is fixed.
99	*/
100	__asm__("" : "+wa"(__A));
101
102	__r = vec_rint((__v2df)__A);
103
104	/* Insert an artificial "read" reference to the variable written
105	above, to ensure the compiler does not schedule the computation
106	of the value after the manipulation of the FPSCR, below.
107	This can be removed if and when GCC PR102783 is fixed.
108	*/
109	__asm__("" : : "wa"(__r));
110	__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
111	break;
112	case _MM_FROUND_TO_NEG_INF:
113	case _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC:
114	__r = vec_floor((__v2df)__A);
115	break;
116	case _MM_FROUND_TO_POS_INF:
117	case _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC:
118	__r = vec_ceil((__v2df)__A);
119	break;
120	case _MM_FROUND_TO_ZERO:
121	case _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC:
122	__r = vec_trunc((__v2df)__A);
123	break;
124	case _MM_FROUND_CUR_DIRECTION:
125	__r = vec_rint((__v2df)__A);
126	break;
127	}
128	if (__rounding & _MM_FROUND_NO_EXC) {
129	/* Insert an artificial "read" reference to the variable written
130	above, to ensure the compiler does not schedule the computation
131	of the value after the manipulation of the FPSCR, below.
132	This can be removed if and when GCC PR102783 is fixed.
133	*/
134	__asm__("" : : "wa"(__r));
135	/* Restore enabled exceptions. */
136	#ifdef _ARCH_PWR9
137	__fpscr_save.__fr = __builtin_ppc_mffsl();
138	#else
139	__fpscr_save.__fr = __builtin_ppc_mffs();
140	__fpscr_save.__fpscr &= 0x70007f0ffL;
141	#endif
142	__fpscr_save.__fpscr \|= __enables_save.__fpscr;
143	__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
144	}
145	return (__m128d)__r;
146	}
147
148	extern __inline __m128d
149	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
150	_mm_round_sd(__m128d __A, __m128d __B, int __rounding) {
151	__B = _mm_round_pd(__B, __rounding);
152	__v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]};
153	return (__m128d)__r;
154	}
155
156	extern __inline __m128
157	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
158	_mm_round_ps(__m128 __A, int __rounding) {
159	__v4sf __r;
160	union {
161	double __fr;
162	long long __fpscr;
163	} __enables_save, __fpscr_save;
164
165	if (__rounding & _MM_FROUND_NO_EXC) {
166	/* Save enabled exceptions, disable all exceptions,
167	and preserve the rounding mode. */
168	#ifdef _ARCH_PWR9
169	__asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
170	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
171	#else
172	__fpscr_save.__fr = __builtin_ppc_mffs();
173	__enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
174	__fpscr_save.__fpscr &= ~0xf8;
175	__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
176	#endif
177	/* Insert an artificial "read/write" reference to the variable
178	read below, to ensure the compiler does not schedule
179	a read/use of the variable before the FPSCR is modified, above.
180	This can be removed if and when GCC PR102783 is fixed.
181	*/
182	__asm__("" : "+wa"(__A));
183	}
184
185	switch (__rounding) {
186	case _MM_FROUND_TO_NEAREST_INT:
187	#ifdef _ARCH_PWR9
188	__fpscr_save.__fr = __builtin_ppc_mffsl();
189	#else
190	__fpscr_save.__fr = __builtin_ppc_mffs();
191	__fpscr_save.__fpscr &= 0x70007f0ffL;
192	#endif
193	__attribute__((fallthrough));
194	case _MM_FROUND_TO_NEAREST_INT \| _MM_FROUND_NO_EXC:
195	__builtin_ppc_set_fpscr_rn(0b00);
196	/* Insert an artificial "read/write" reference to the variable
197	read below, to ensure the compiler does not schedule
198	a read/use of the variable before the FPSCR is modified, above.
199	This can be removed if and when GCC PR102783 is fixed.
200	*/
201	__asm__("" : "+wa"(__A));
202
203	__r = vec_rint((__v4sf)__A);
204
205	/* Insert an artificial "read" reference to the variable written
206	above, to ensure the compiler does not schedule the computation
207	of the value after the manipulation of the FPSCR, below.
208	This can be removed if and when GCC PR102783 is fixed.
209	*/
210	__asm__("" : : "wa"(__r));
211	__builtin_ppc_set_fpscr_rn(__fpscr_save.__fpscr);
212	break;
213	case _MM_FROUND_TO_NEG_INF:
214	case _MM_FROUND_TO_NEG_INF \| _MM_FROUND_NO_EXC:
215	__r = vec_floor((__v4sf)__A);
216	break;
217	case _MM_FROUND_TO_POS_INF:
218	case _MM_FROUND_TO_POS_INF \| _MM_FROUND_NO_EXC:
219	__r = vec_ceil((__v4sf)__A);
220	break;
221	case _MM_FROUND_TO_ZERO:
222	case _MM_FROUND_TO_ZERO \| _MM_FROUND_NO_EXC:
223	__r = vec_trunc((__v4sf)__A);
224	break;
225	case _MM_FROUND_CUR_DIRECTION:
226	__r = vec_rint((__v4sf)__A);
227	break;
228	}
229	if (__rounding & _MM_FROUND_NO_EXC) {
230	/* Insert an artificial "read" reference to the variable written
231	above, to ensure the compiler does not schedule the computation
232	of the value after the manipulation of the FPSCR, below.
233	This can be removed if and when GCC PR102783 is fixed.
234	*/
235	__asm__("" : : "wa"(__r));
236	/* Restore enabled exceptions. */
237	#ifdef _ARCH_PWR9
238	__fpscr_save.__fr = __builtin_ppc_mffsl();
239	#else
240	__fpscr_save.__fr = __builtin_ppc_mffs();
241	__fpscr_save.__fpscr &= 0x70007f0ffL;
242	#endif
243	__fpscr_save.__fpscr \|= __enables_save.__fpscr;
244	__builtin_ppc_mtfsf(0b00000011, __fpscr_save.__fr);
245	}
246	return (__m128)__r;
247	}
248
249	extern __inline __m128
250	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
251	_mm_round_ss(__m128 __A, __m128 __B, int __rounding) {
252	__B = _mm_round_ps(__B, __rounding);
253	__v4sf __r = (__v4sf)__A;
254	__r[0] = ((__v4sf)__B)[0];
255	return (__m128)__r;
256	}
257
258	#define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
259	#define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
260
261	#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
262	#define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
263
264	#define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
265	#define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
266
267	#define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
268	#define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
269
270	extern __inline __m128i
271	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
272	_mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
273	__v16qi __result = (__v16qi)__A;
274
275	__result[__N & 0xf] = __D;
276
277	return (__m128i)__result;
278	}
279
280	extern __inline __m128i
281	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
282	_mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
283	__v4si __result = (__v4si)__A;
284
285	__result[__N & 3] = __D;
286
287	return (__m128i)__result;
288	}
289
290	extern __inline __m128i
291	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
292	_mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
293	__v2di __result = (__v2di)__A;
294
295	__result[__N & 1] = __D;
296
297	return (__m128i)__result;
298	}
299
300	extern __inline int
301	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
302	_mm_extract_epi8(__m128i __X, const int __N) {
303	return (unsigned char)((__v16qi)__X)[__N & 15];
304	}
305
306	extern __inline int
307	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
308	_mm_extract_epi32(__m128i __X, const int __N) {
309	return ((__v4si)__X)[__N & 3];
310	}
311
312	extern __inline int
313	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
314	_mm_extract_epi64(__m128i __X, const int __N) {
315	return ((__v2di)__X)[__N & 1];
316	}
317
318	extern __inline int
319	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
320	_mm_extract_ps(__m128 __X, const int __N) {
321	return ((__v4si)__X)[__N & 3];
322	}
323
324	#ifdef _ARCH_PWR8
325	extern __inline __m128i
326	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
327	_mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
328	__v16qu __charmask = vec_splats((unsigned char)__imm8);
329	__charmask = vec_gb(__charmask);
330	__v8hu __shortmask = (__v8hu)vec_unpackh((__v16qi)__charmask);
331	#ifdef __BIG_ENDIAN__
332	__shortmask = vec_reve(__shortmask);
333	#endif
334	return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
335	}
336	#endif
337
338	extern __inline __m128i
339	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
340	_mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
341	#ifdef _ARCH_PWR10
342	return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask);
343	#else
344	const __v16qu __seven = vec_splats((unsigned char)0x07);
345	__v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
346	return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask);
347	#endif
348	}
349
350	extern __inline __m128
351	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
352	_mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) {
353	__v16qu __pcv[] = {
354	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
355	{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
356	{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
357	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
358	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
359	{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
360	{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
361	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
362	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
363	{16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
364	{0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
365	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
366	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
367	{16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
368	{0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
369	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
370	};
371	__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
372	return (__m128)__r;
373	}
374
375	extern __inline __m128
376	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
377	_mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) {
378	#ifdef _ARCH_PWR10
379	return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask);
380	#else
381	const __v4si __zero = {0};
382	const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero);
383	return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask);
384	#endif
385	}
386
387	extern __inline __m128d
388	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
389	_mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) {
390	__v16qu __pcv[] = {
391	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
392	{16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
393	{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
394	{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
395	__v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
396	return (__m128d)__r;
397	}
398
399	#ifdef _ARCH_PWR8
400	extern __inline __m128d
401	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
402	_mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) {
403	#ifdef _ARCH_PWR10
404	return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask);
405	#else
406	const __v2di __zero = {0};
407	const __vector __bool long long __boolmask =
408	vec_cmplt((__v2di)__mask, __zero);
409	return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask);
410	#endif
411	}
412	#endif
413
414	extern __inline int
415	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
416	_mm_testz_si128(__m128i __A, __m128i __B) {
417	/* Note: This implementation does NOT set "zero" or "carry" flags. */
418	const __v16qu __zero = {0};
419	return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero);
420	}
421
422	extern __inline int
423	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
424	_mm_testc_si128(__m128i __A, __m128i __B) {
425	/* Note: This implementation does NOT set "zero" or "carry" flags. */
426	const __v16qu __zero = {0};
427	const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A);
428	return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero);
429	}
430
431	extern __inline int
432	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
433	_mm_testnzc_si128(__m128i __A, __m128i __B) {
434	/* Note: This implementation does NOT set "zero" or "carry" flags. */
435	return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0;
436	}
437
438	#define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
439
440	#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
441
442	#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
443
444	#ifdef _ARCH_PWR8
445	extern __inline __m128i
446	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
447	_mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
448	return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y);
449	}
450	#endif
451
452	extern __inline __m128i
453	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
454	_mm_min_epi8(__m128i __X, __m128i __Y) {
455	return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y);
456	}
457
458	extern __inline __m128i
459	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
460	_mm_min_epu16(__m128i __X, __m128i __Y) {
461	return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y);
462	}
463
464	extern __inline __m128i
465	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
466	_mm_min_epi32(__m128i __X, __m128i __Y) {
467	return (__m128i)vec_min((__v4si)__X, (__v4si)__Y);
468	}
469
470	extern __inline __m128i
471	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
472	_mm_min_epu32(__m128i __X, __m128i __Y) {
473	return (__m128i)vec_min((__v4su)__X, (__v4su)__Y);
474	}
475
476	extern __inline __m128i
477	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
478	_mm_max_epi8(__m128i __X, __m128i __Y) {
479	return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y);
480	}
481
482	extern __inline __m128i
483	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
484	_mm_max_epu16(__m128i __X, __m128i __Y) {
485	return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y);
486	}
487
488	extern __inline __m128i
489	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
490	_mm_max_epi32(__m128i __X, __m128i __Y) {
491	return (__m128i)vec_max((__v4si)__X, (__v4si)__Y);
492	}
493
494	extern __inline __m128i
495	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
496	_mm_max_epu32(__m128i __X, __m128i __Y) {
497	return (__m128i)vec_max((__v4su)__X, (__v4su)__Y);
498	}
499
500	extern __inline __m128i
501	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
502	_mm_mullo_epi32(__m128i __X, __m128i __Y) {
503	return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y);
504	}
505
506	#ifdef _ARCH_PWR8
507	extern __inline __m128i
508	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
509	_mm_mul_epi32(__m128i __X, __m128i __Y) {
510	return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y);
511	}
512	#endif
513
514	extern __inline __m128i
515	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
516	_mm_cvtepi8_epi16(__m128i __A) {
517	return (__m128i)vec_unpackh((__v16qi)__A);
518	}
519
520	extern __inline __m128i
521	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
522	_mm_cvtepi8_epi32(__m128i __A) {
523	__A = (__m128i)vec_unpackh((__v16qi)__A);
524	return (__m128i)vec_unpackh((__v8hi)__A);
525	}
526
527	#ifdef _ARCH_PWR8
528	extern __inline __m128i
529	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
530	_mm_cvtepi8_epi64(__m128i __A) {
531	__A = (__m128i)vec_unpackh((__v16qi)__A);
532	__A = (__m128i)vec_unpackh((__v8hi)__A);
533	return (__m128i)vec_unpackh((__v4si)__A);
534	}
535	#endif
536
537	extern __inline __m128i
538	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
539	_mm_cvtepi16_epi32(__m128i __A) {
540	return (__m128i)vec_unpackh((__v8hi)__A);
541	}
542
543	#ifdef _ARCH_PWR8
544	extern __inline __m128i
545	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
546	_mm_cvtepi16_epi64(__m128i __A) {
547	__A = (__m128i)vec_unpackh((__v8hi)__A);
548	return (__m128i)vec_unpackh((__v4si)__A);
549	}
550	#endif
551
552	#ifdef _ARCH_PWR8
553	extern __inline __m128i
554	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
555	_mm_cvtepi32_epi64(__m128i __A) {
556	return (__m128i)vec_unpackh((__v4si)__A);
557	}
558	#endif
559
560	extern __inline __m128i
561	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
562	_mm_cvtepu8_epi16(__m128i __A) {
563	const __v16qu __zero = {0};
564	#ifdef __LITTLE_ENDIAN__
565	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
566	#else /* __BIG_ENDIAN__. */
567	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
568	#endif /* __BIG_ENDIAN__. */
569	return __A;
570	}
571
572	extern __inline __m128i
573	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
574	_mm_cvtepu8_epi32(__m128i __A) {
575	const __v16qu __zero = {0};
576	#ifdef __LITTLE_ENDIAN__
577	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
578	__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
579	#else /* __BIG_ENDIAN__. */
580	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
581	__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
582	#endif /* __BIG_ENDIAN__. */
583	return __A;
584	}
585
586	extern __inline __m128i
587	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
588	_mm_cvtepu8_epi64(__m128i __A) {
589	const __v16qu __zero = {0};
590	#ifdef __LITTLE_ENDIAN__
591	__A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
592	__A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
593	__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
594	#else /* __BIG_ENDIAN__. */
595	__A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
596	__A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
597	__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
598	#endif /* __BIG_ENDIAN__. */
599	return __A;
600	}
601
602	extern __inline __m128i
603	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
604	_mm_cvtepu16_epi32(__m128i __A) {
605	const __v8hu __zero = {0};
606	#ifdef __LITTLE_ENDIAN__
607	__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
608	#else /* __BIG_ENDIAN__. */
609	__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
610	#endif /* __BIG_ENDIAN__. */
611	return __A;
612	}
613
614	extern __inline __m128i
615	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
616	_mm_cvtepu16_epi64(__m128i __A) {
617	const __v8hu __zero = {0};
618	#ifdef __LITTLE_ENDIAN__
619	__A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
620	__A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
621	#else /* __BIG_ENDIAN__. */
622	__A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
623	__A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
624	#endif /* __BIG_ENDIAN__. */
625	return __A;
626	}
627
628	extern __inline __m128i
629	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
630	_mm_cvtepu32_epi64(__m128i __A) {
631	const __v4su __zero = {0};
632	#ifdef __LITTLE_ENDIAN__
633	__A = (__m128i)vec_mergeh((__v4su)__A, __zero);
634	#else /* __BIG_ENDIAN__. */
635	__A = (__m128i)vec_mergeh(__zero, (__v4su)__A);
636	#endif /* __BIG_ENDIAN__. */
637	return __A;
638	}
639
640	/* Return horizontal packed word minimum and its index in bits [15:0]
641	and bits [18:16] respectively. */
642	extern __inline __m128i
643	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
644	_mm_minpos_epu16(__m128i __A) {
645	union __u {
646	__m128i __m;
647	__v8hu __uh;
648	};
649	union __u __u = {.__m = __A}, __r = {.__m = {0}};
650	unsigned short __ridx = 0;
651	unsigned short __rmin = __u.__uh[__ridx];
652	unsigned long __i;
653	for (__i = 1; __i < 8; __i++) {
654	if (__u.__uh[__i] < __rmin) {
655	__rmin = __u.__uh[__i];
656	__ridx = __i;
657	}
658	}
659	__r.__uh[0] = __rmin;
660	__r.__uh[1] = __ridx;
661	return __r.__m;
662	}
663
664	extern __inline __m128i
665	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
666	_mm_packus_epi32(__m128i __X, __m128i __Y) {
667	return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y);
668	}
669
670	#ifdef _ARCH_PWR8
671	extern __inline __m128i
672	__attribute__((__gnu_inline__, __always_inline__, __artificial__))
673	_mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
674	return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y);
675	}
676	#endif
677
678	#else
679	#include_next <smmintrin.h>
680	#endif /* defined(__powerpc64__) && \
681	* (defined(__linux__) \|\| defined(__FreeBSD__) \|\| defined(_AIX)) */
682
683	#endif /* SMMINTRIN_H_ */
684

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of clang/lib/Headers/ppc_wrappers/smmintrin.h