Warning: This file is not a C or C++ file. It does not have highlighting.
1 | /*===---- pmmintrin.h - Implementation of SSE3 intrinsics on PowerPC -------=== |
---|---|
2 | * |
3 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | * See https://llvm.org/LICENSE.txt for license information. |
5 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | * |
7 | *===-----------------------------------------------------------------------=== |
8 | */ |
9 | |
10 | /* Implemented from the specification included in the Intel C++ Compiler |
11 | User Guide and Reference, version 9.0. */ |
12 | |
13 | #ifndef NO_WARN_X86_INTRINSICS |
14 | /* This header is distributed to simplify porting x86_64 code that |
15 | makes explicit use of Intel intrinsics to powerpc64le. |
16 | It is the user's responsibility to determine if the results are |
17 | acceptable and make additional changes as necessary. |
18 | Note that much code that uses Intel intrinsics can be rewritten in |
19 | standard C or GNU C extensions, which are more portable and better |
20 | optimized across multiple targets. |
21 | |
22 | In the specific case of X86 SSE3 intrinsics, the PowerPC VMX/VSX ISA |
23 | is a good match for most SIMD operations. However the Horizontal |
24 | add/sub requires the data pairs be permuted into a separate |
25 | registers with vertical even/odd alignment for the operation. |
26 | And the addsub operation requires the sign of only the even numbered |
27 | elements be flipped (xored with -0.0). |
28 | For larger blocks of code using these intrinsic implementations, |
29 | the compiler be should be able to schedule instructions to avoid |
30 | additional latency. |
31 | |
32 | In the specific case of the monitor and mwait instructions there are |
33 | no direct equivalent in the PowerISA at this time. So those |
34 | intrinsics are not implemented. */ |
35 | #error \ |
36 | "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." |
37 | #endif |
38 | |
39 | #ifndef PMMINTRIN_H_ |
40 | #define PMMINTRIN_H_ |
41 | |
42 | #if defined(__powerpc64__) && \ |
43 | (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) |
44 | |
45 | /* We need definitions from the SSE2 and SSE header files*/ |
46 | #include <emmintrin.h> |
47 | |
48 | extern __inline __m128 |
49 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
50 | _mm_addsub_ps(__m128 __X, __m128 __Y) { |
51 | const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0}; |
52 | __v4sf __even_neg_Y = vec_xor(__Y, __even_n0); |
53 | return (__m128)vec_add(__X, __even_neg_Y); |
54 | } |
55 | |
56 | extern __inline __m128d |
57 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
58 | _mm_addsub_pd(__m128d __X, __m128d __Y) { |
59 | const __v2df __even_n0 = {-0.0, 0.0}; |
60 | __v2df __even_neg_Y = vec_xor(__Y, __even_n0); |
61 | return (__m128d)vec_add(__X, __even_neg_Y); |
62 | } |
63 | |
64 | extern __inline __m128 |
65 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
66 | _mm_hadd_ps(__m128 __X, __m128 __Y) { |
67 | __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, |
68 | 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, |
69 | 0x18, 0x19, 0x1A, 0x1B}; |
70 | __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, |
71 | 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, |
72 | 0x1C, 0x1D, 0x1E, 0x1F}; |
73 | return (__m128)vec_add(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), |
74 | vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); |
75 | } |
76 | |
77 | extern __inline __m128 |
78 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
79 | _mm_hsub_ps(__m128 __X, __m128 __Y) { |
80 | __vector unsigned char __xform2 = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, |
81 | 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, |
82 | 0x18, 0x19, 0x1A, 0x1B}; |
83 | __vector unsigned char __xform1 = {0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, |
84 | 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, |
85 | 0x1C, 0x1D, 0x1E, 0x1F}; |
86 | return (__m128)vec_sub(vec_perm((__v4sf)__X, (__v4sf)__Y, __xform2), |
87 | vec_perm((__v4sf)__X, (__v4sf)__Y, __xform1)); |
88 | } |
89 | |
90 | extern __inline __m128d |
91 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
92 | _mm_hadd_pd(__m128d __X, __m128d __Y) { |
93 | return (__m128d)vec_add(vec_mergeh((__v2df)__X, (__v2df)__Y), |
94 | vec_mergel((__v2df)__X, (__v2df)__Y)); |
95 | } |
96 | |
97 | extern __inline __m128d |
98 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
99 | _mm_hsub_pd(__m128d __X, __m128d __Y) { |
100 | return (__m128d)vec_sub(vec_mergeh((__v2df)__X, (__v2df)__Y), |
101 | vec_mergel((__v2df)__X, (__v2df)__Y)); |
102 | } |
103 | |
104 | #ifdef _ARCH_PWR8 |
105 | extern __inline __m128 |
106 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
107 | _mm_movehdup_ps(__m128 __X) { |
108 | return (__m128)vec_mergeo((__v4su)__X, (__v4su)__X); |
109 | } |
110 | #endif |
111 | |
112 | #ifdef _ARCH_PWR8 |
113 | extern __inline __m128 |
114 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
115 | _mm_moveldup_ps(__m128 __X) { |
116 | return (__m128)vec_mergee((__v4su)__X, (__v4su)__X); |
117 | } |
118 | #endif |
119 | |
120 | extern __inline __m128d |
121 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
122 | _mm_loaddup_pd(double const *__P) { |
123 | return (__m128d)vec_splats(*__P); |
124 | } |
125 | |
126 | extern __inline __m128d |
127 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
128 | _mm_movedup_pd(__m128d __X) { |
129 | return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0)); |
130 | } |
131 | |
132 | extern __inline __m128i |
133 | __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
134 | _mm_lddqu_si128(__m128i const *__P) { |
135 | return (__m128i)(vec_vsx_ld(0, (signed int const *)__P)); |
136 | } |
137 | |
138 | /* POWER8 / POWER9 have no equivalent for _mm_monitor nor _mm_wait. */ |
139 | |
140 | #else |
141 | #include_next <pmmintrin.h> |
142 | #endif /* defined(__powerpc64__) && \ |
143 | * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ |
144 | |
145 | #endif /* PMMINTRIN_H_ */ |
146 |
Warning: This file is not a C or C++ file. It does not have highlighting.